# coding=utf8 from HTMLParser import HTMLParser class MyHTMLParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.wanted = [] # attrs是一个列表,里面存放的是二元组 def attr_exists(self, attrs, attrname): for attr in attrs: if attr[0] == attrname: return attr[1] return None def handle_starttag(self, tag, attrs): # 遍历attrs中的元素的第一个元素是否为title if tag == "a" and self.attr_exists(attrs, "title") != None: self.wanted.append(self.attr_exists(attrs, "title")) myhtml = MyHTMLParser() # 喂数据,即把需要的内容存放进wanted myhtml.feed(self.home_page.content)