In this HackerRank HTML Parse - Part 1 problem solution you have given an HTML code snippet of N lines. Your task is to print start tags, end tags, and empty tags separately.

HackerRank HTML Parser - Part 1 in python problem solution

HackerRank HTML Parser - Part 1 in python problem solution

import re

r_tag = re.compile(r'<\s*(\w+)\s*([^>]*)>|<\s*\/\s*(\w+)\s*>')
r_empty = re.compile(r'/\s*$')
r_attr = re.compile(r'\s*([^ =]+)(?:\s*=\s*(?:(?:"([^"]+)")|(?:\'([^\']+)\')))?')

N = input()
html = ''
while N > 0:
    N -= 1
    html += raw_input()

html = re.sub('<!--.*?-->', '', html, re.S)
for group in r_tag.findall(html):
    start_tag = group[0]
    attrs = group[1]
    end_tag = group[2]
    status = ('Start' if not r_empty.search(attrs) else 'Empty') if start_tag else 'End  '
    
    print status + ' : ' + (start_tag or end_tag)
    
    if start_tag:
        for attr in r_attr.findall(r_empty.sub('', attrs)):
            print '-> ' + attr[0]  + ' > ' + (attr[1] or attr[2] or 'None')