ó
§l§hc           @   s)  d  Z  d d l Z d d l Z e j d ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z	 e j d	 ƒ Z
 e j d
 ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z e j d e j ƒ Z e j d	 ƒ Z e j d ƒ Z d e f d „  ƒ  YZ d e j f d „  ƒ  YZ d S(   s   A parser for HTML and XHTML.iÿÿÿÿNs   [&<]s
   &[a-zA-Z#]s%   &([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]s)   &#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]s	   <[a-zA-Z]s
   </[a-zA-Z]t   >s   --\s*>s$   ([a-zA-Z][^	
 /> ]*)(?:\s|/(?!>))*s   [a-zA-Z][^	
 /> ]*s]   ((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*s  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
s#   </\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>t   HTMLParseErrorc           B   s#   e  Z d  Z d d „ Z d „  Z RS(   s&   Exception raised for all parse errors.c         C   s3   | s t  ‚ | |  _ | d |  _ | d |  _ d  S(   Ni    i   (   t   AssertionErrort   msgt   linenot   offset(   t   selfR   t   position(    (    s    /usr/lib/python2.7/HTMLParser.pyt   __init__=   s    	c         C   sW   |  j  } |  j d  k	 r, | d |  j } n  |  j d  k	 rS | d |  j d } n  | S(   Ns   , at line %ds   , column %di   (   R   R   t   NoneR   (   R   t   result(    (    s    /usr/lib/python2.7/HTMLParser.pyt   __str__C   s    	N(   NN(   t   __name__t
   __module__t   __doc__R	   R   R   (    (    (    s    /usr/lib/python2.7/HTMLParser.pyR   :   s   t
   HTMLParserc           B   s  e  Z d  Z d Z d „  Z d „  Z d „  Z d „  Z d „  Z d Z
 d „  Z d	 „  Z d
 „  Z d „  Z d „  Z d d „ Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d Z d „  Z  RS(    sÇ  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  Entity references are
    passed by calling self.handle_entityref() with the entity
    reference as the argument.  Numeric character references are
    passed to self.handle_charref() with the string containing the
    reference as the argument.
    t   scriptt   stylec         C   s   |  j  ƒ  d S(   s#   Initialize and reset this instance.N(   t   reset(   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyR   c   s    c         C   s8   d |  _  d |  _ t |  _ d |  _ t j j |  ƒ d S(   s1   Reset this instance.  Loses all unprocessed data.t    s   ???N(	   t   rawdatat   lasttagt   interesting_normalt   interestingR	   t
   cdata_elemt
   markupbaset
   ParserBaseR   (   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyR   g   s
    				c         C   s!   |  j  | |  _  |  j d ƒ d S(   s‘   Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        i    N(   R   t   goahead(   R   t   data(    (    s    /usr/lib/python2.7/HTMLParser.pyt   feedo   s    c         C   s   |  j  d ƒ d S(   s   Handle any buffered data.i   N(   R   (   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyt   closex   s    c         C   s   t  | |  j ƒ  ƒ ‚ d  S(   N(   R   t   getpos(   R   t   message(    (    s    /usr/lib/python2.7/HTMLParser.pyt   error|   s    c         C   s   |  j  S(   s)   Return full source of start tag: '<...>'.(   t   _HTMLParser__starttag_text(   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyt   get_starttag_text   s    c         C   s2   | j  ƒ  |  _ t j d |  j t j ƒ |  _ d  S(   Ns   </\s*%s\s*>(   t   lowerR   t   ret   compilet   IR   (   R   t   elem(    (    s    /usr/lib/python2.7/HTMLParser.pyt   set_cdata_mode…   s    c         C   s   t  |  _ d  |  _ d  S(   N(   R   R   R	   R   (   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyt   clear_cdata_mode‰   s    	c         C   s¨  |  j  } d } t | ƒ } x4| | k  rQ|  j j | | ƒ } | rT | j ƒ  } n |  j ra Pn  | } | | k  rŠ |  j | | | !ƒ n  |  j | | ƒ } | | k r¬ Pn  | j } | d | ƒ rct	 j
 | | ƒ rè |  j | ƒ } nµ | d | ƒ r	|  j | ƒ } n” | d | ƒ r*|  j | ƒ } ns | d | ƒ rK|  j | ƒ } nR | d | ƒ rl|  j | ƒ } n1 | d | k  s‚| rœ|  j d ƒ | d } n P| d k  rN| s³Pn  t	 j
 | | ƒ rÈn}| d | ƒ r$| d | k r÷|  j d ƒ qEt j
 | | ƒ rqE|  j | | d ƒ n!| d | ƒ r| } x8 d D]0 }	 | j |	 | d ƒ r@| t |	 ƒ 8} Pq@q@W|  j | | d | !ƒ n¶ | d | ƒ r¶|  j | | d ƒ n | | | d !j ƒ  d k rë|  j | | d ƒ nZ | d | ƒ r|  j | | d ƒ n3 | d | ƒ rE|  j | | d ƒ t d ƒ ‚ n  | } n  |  j | | ƒ } q | d | ƒ r3t j
 | | ƒ } | rî| j ƒ  d d !}
 |  j |
 ƒ | j ƒ  } | d | d ƒ sÖ| d } n  |  j | | ƒ } q qNd | | k r/|  j | | | d !ƒ |  j | | d ƒ } n  Pq | d | ƒ r<t j
 | | ƒ } | rº| j d ƒ }
 |  j |
 ƒ | j ƒ  } | d | d ƒ s¢| d } n  |  j | | ƒ } q n  t j
 | | ƒ } | r| rþ| j ƒ  | | k rþ|  j d ƒ n  PqN| d | k  r8|  j d ƒ |  j | | d ƒ } qNPq d s t d ƒ ‚ q W| r—| | k  r—|  j r—|  j | | | !ƒ |  j | | ƒ } n  | | |  _  d  S(   Ni    t   <s   </s   <!--s   <?s   <!i   i   s   --!s   --t   -i   s	   <![CDATA[i   i	   s	   <!doctypes   we should not get here!s   &#iÿÿÿÿt   ;t   &s#   EOF in middle of entity or char refs   interesting.search() lied(   s   --!s   --R,   (    R   t   lenR   t   searcht   startR   t   handle_datat	   updatepost
   startswitht   starttagopent   matcht   parse_starttagt   parse_endtagt   parse_commentt   parse_pit   parse_html_declarationt
   endtagopent   handle_commentt   endswitht   unknown_declR$   t   handle_declt	   handle_piR   t   charreft   groupt   handle_charreft   endt	   entityreft   handle_entityreft
   incompleteR!   (   R   RE   R   t   it   nR6   t   jR4   t   kt   suffixt   name(    (    s    /usr/lib/python2.7/HTMLParser.pyR      sÄ    		  		c         C   sì   |  j  } | | | d !d k r0 |  j d ƒ n  | | | d !d k rT |  j | ƒ S| | | d !d k rx |  j | ƒ S| | | d !j ƒ  d	 k rÛ | j d
 | d ƒ } | d k r» d S|  j | | d | !ƒ | d S|  j | ƒ Sd  S(   Ni   s   <!s+   unexpected call to parse_html_declaration()i   s   <!--i   s   <![i	   s	   <!doctypeR    iÿÿÿÿi   (   R   R!   R9   t   parse_marked_sectionR$   t   findR@   t   parse_bogus_comment(   R   RI   R   t   gtpos(    (    s    /usr/lib/python2.7/HTMLParser.pyR;     s    	i   c         C   s   |  j  } | | | d !d k r0 |  j d ƒ n  | j d | d ƒ } | d k rV d S| rw |  j | | d | !ƒ n  | d S(	   Ni   s   <!s   </s"   unexpected call to parse_comment()R    iÿÿÿÿi   (   s   <!s   </(   R   R!   RP   R=   (   R   RI   t   reportR   t   pos(    (    s    /usr/lib/python2.7/HTMLParser.pyRQ     s    	c         C   s€   |  j  } | | | d !d k s, t d ƒ ‚ t j | | d ƒ } | sL d S| j ƒ  } |  j | | d | !ƒ | j ƒ  } | S(   Ni   s   <?s   unexpected call to parse_pi()iÿÿÿÿ(   R   R   t   picloseR0   R1   RA   RE   (   R   RI   R   R6   RK   (    (    s    /usr/lib/python2.7/HTMLParser.pyR:   #  s    	#c         C   s€  d  |  _ |  j | ƒ } | d k  r( | S|  j } | | | !|  _ g  } t j | | d ƒ } | so t d ƒ ‚ | j ƒ  } | j d ƒ j	 ƒ  |  _
 } xî | | k  r‡t j | | ƒ } | sÂ Pn  | j d d d ƒ \ }	 }
 } |
 sï d  } nX | d  d k o| d k n s7| d  d k o2| d k n rG| d d !} n  | r_|  j | ƒ } n  | j |	 j	 ƒ  | f ƒ | j ƒ  } qš W| | | !j ƒ  } | d k r+|  j ƒ  \ } } d |  j k r | |  j j d ƒ } t |  j ƒ |  j j d ƒ } n | t |  j ƒ } |  j | | | !ƒ | S| j d
 ƒ rM|  j | | ƒ n/ |  j | | ƒ | |  j k r||  j | ƒ n  | S(   Ni    i   s#   unexpected call to parse_starttag()i   i   s   'iÿÿÿÿt   "R    s   />s   
(   R    s   />(   R	   R"   t   check_for_whole_start_tagR   t   tagfindR6   R   RE   RC   R$   R   t   attrfindt   unescapet   appendt   stripR   t   countR/   t   rfindR2   R>   t   handle_startendtagt   handle_starttagt   CDATA_CONTENT_ELEMENTSR)   (   R   RI   t   endposR   t   attrsR6   RL   t   tagt   mt   attrnamet   restt	   attrvalueRE   R   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyR7   /  sR    			$$c         C   sý   |  j  } t j | | ƒ } | rí | j ƒ  } | | | d !} | d k rR | d S| d k r² | j d | ƒ rx | d S| j d | ƒ rŽ d S|  j | | d ƒ |  j d ƒ n  | d k rÂ d S| d	 k rÒ d S| | k râ | S| d Sn  t d
 ƒ ‚ d  S(   Ni   R    t   /s   />i   iÿÿÿÿs   malformed empty start tagR   s6   abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZs   we should not get here!(   R   t   locatestarttagendR6   RE   R4   R3   R!   R   (   R   RI   R   Re   RK   t   next(    (    s    /usr/lib/python2.7/HTMLParser.pyRW   c  s,    	c         C   s  |  j  } | | | d !d k s, t d ƒ ‚ t j | | d ƒ } | sL d S| j ƒ  } t j | | ƒ } | s$|  j d  k	 r— |  j	 | | | !ƒ | St
 j | | d ƒ } | sâ | | | d !d k rÒ | d S|  j | ƒ Sn  | j d ƒ j ƒ  } | j d | j ƒ  ƒ } |  j | ƒ | d S| j d ƒ j ƒ  } |  j d  k	 rr| |  j k rr|  j	 | | | !ƒ | Sn  |  j | ƒ |  j ƒ  | S(	   Ni   s   </s   unexpected call to parse_endtagi   iÿÿÿÿi   s   </>R    (   R   R   t	   endendtagR0   RE   t
   endtagfindR6   R   R	   R2   RX   RQ   RC   R$   RP   t   handle_endtagR*   (   R   RI   R   R6   RR   t	   namematcht   tagnameR(   (    (    s    /usr/lib/python2.7/HTMLParser.pyR8   ƒ  s8    	#
c         C   s!   |  j  | | ƒ |  j | ƒ d  S(   N(   R`   Rn   (   R   Rd   Rc   (    (    s    /usr/lib/python2.7/HTMLParser.pyR_   «  s    c         C   s   d  S(   N(    (   R   Rd   Rc   (    (    s    /usr/lib/python2.7/HTMLParser.pyR`   °  s    c         C   s   d  S(   N(    (   R   Rd   (    (    s    /usr/lib/python2.7/HTMLParser.pyRn   ´  s    c         C   s   d  S(   N(    (   R   RN   (    (    s    /usr/lib/python2.7/HTMLParser.pyRD   ¸  s    c         C   s   d  S(   N(    (   R   RN   (    (    s    /usr/lib/python2.7/HTMLParser.pyRG   ¼  s    c         C   s   d  S(   N(    (   R   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyR2   À  s    c         C   s   d  S(   N(    (   R   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyR=   Ä  s    c         C   s   d  S(   N(    (   R   t   decl(    (    s    /usr/lib/python2.7/HTMLParser.pyR@   È  s    c         C   s   d  S(   N(    (   R   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyRA   Ì  s    c         C   s   d  S(   N(    (   R   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyR?   Ï  s    c            s2   d | k r | S‡  f d †  } t  j d | | ƒ S(   NR.   c            s  |  j  ƒ  d }  yZ |  d d k ri |  d }  |  d d k rS t |  d d ƒ } n t |  ƒ } t | ƒ SWn t k
 r† d |  d SXt j d  k rë d	 d  l } i d
 d 6} x- | j j	 ƒ  D] \ } } t | ƒ | | <q¿ W| t _ n  y ˆ  j |  SWn t
 k
 rd |  d SXd  S(   Ni    t   #i   t   xt   Xi   s   &#R-   iÿÿÿÿu   't   aposR.   (   Rs   Rt   (   t   groupst   intt   unichrt
   ValueErrorR   t
   entitydefsR	   t   htmlentitydefst   name2codepointt	   iteritemst   KeyError(   t   st   cR{   Rz   RL   t   v(   R   (    s    /usr/lib/python2.7/HTMLParser.pyt   replaceEntities×  s(    
s#   &(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));(   R%   t   sub(   R   R   R‚   (    (   R   s    /usr/lib/python2.7/HTMLParser.pyRZ   Ô  s    (   R   R   N(!   R   R   R   Ra   R   R   R   R   R!   R	   R"   R#   R)   R*   R   R;   RQ   R:   R7   RW   R8   R_   R`   Rn   RD   RG   R2   R=   R@   RA   R?   Rz   RZ   (    (    (    s    /usr/lib/python2.7/HTMLParser.pyR   L   s<   										r			4	 	(										(   R   R   R%   R&   R   RH   RF   RB   R5   R<   RU   t   commentcloseRX   t   tagfind_tolerantRY   t   VERBOSERj   Rl   Rm   t	   ExceptionR   R   R   (    (    (    s    /usr/lib/python2.7/HTMLParser.pyt   <module>   s(   
	