ó
è¾bc           @   s  d  Z  d d l Z d d l Z e j d ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z	 e j d	 ƒ Z
 e j d
 ƒ Z e j d ƒ Z e j d ƒ Z e j d e j ƒ Z e j d ƒ Z e j d ƒ Z d e f d „  ƒ  YZ d e j f d „  ƒ  YZ d S(   s   A parser for HTML and XHTML.iÿÿÿÿNs   [&<]s
   &[a-zA-Z#]s%   &([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]s)   &#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]s	   <[a-zA-Z]t   >s   --\s*>s$   ([a-zA-Z][^	
 /> ]*)(?:\s|/(?!>))*s   [a-zA-Z][^	
 /> ]*s]   ((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*s  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
s#   </\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>t   HTMLParseErrorc           B   s#   e  Z d  Z d d „ Z d „  Z RS(   s&   Exception raised for all parse errors.c         C   s3   | s t  ‚ | |  _ | d |  _ | d |  _ d  S(   Ni    i   (   t   AssertionErrort   msgt   linenot   offset(   t   selfR   t   position(    (    s    /usr/lib/python2.7/HTMLParser.pyt   __init__<   s    	c         C   sW   |  j  } |  j d  k	 r, | d |  j } n  |  j d  k	 rS | d |  j d } n  | S(   Ns   , at line %ds   , column %di   (   R   R   t   NoneR   (   R   t   result(    (    s    /usr/lib/python2.7/HTMLParser.pyt   __str__B   s    	N(   NN(   t   __name__t
   __module__t   __doc__R	   R   R   (    (    (    s    /usr/lib/python2.7/HTMLParser.pyR   9   s   t
   HTMLParserc           B   s  e  Z d  Z d Z d „  Z d „  Z d „  Z d „  Z d „  Z d Z
 d „  Z d	 „  Z d
 „  Z d „  Z d „  Z d d „ Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d Z d „  Z  RS(    sÇ  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  Entity references are
    passed by calling self.handle_entityref() with the entity
    reference as the argument.  Numeric character references are
    passed to self.handle_charref() with the string containing the
    reference as the argument.
    t   scriptt   stylec         C   s   |  j  ƒ  d S(   s#   Initialize and reset this instance.N(   t   reset(   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyR   b   s    c         C   s8   d |  _  d |  _ t |  _ d |  _ t j j |  ƒ d S(   s1   Reset this instance.  Loses all unprocessed data.t    s   ???N(	   t   rawdatat   lasttagt   interesting_normalt   interestingR	   t
   cdata_elemt
   markupbaset
   ParserBaseR   (   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyR   f   s
    				c         C   s!   |  j  | |  _  |  j d ƒ d S(   s‘   Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        i    N(   R   t   goahead(   R   t   data(    (    s    /usr/lib/python2.7/HTMLParser.pyt   feedn   s    c         C   s   |  j  d ƒ d S(   s   Handle any buffered data.i   N(   R   (   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyt   closew   s    c         C   s   t  | |  j ƒ  ƒ ‚ d  S(   N(   R   t   getpos(   R   t   message(    (    s    /usr/lib/python2.7/HTMLParser.pyt   error{   s    c         C   s   |  j  S(   s)   Return full source of start tag: '<...>'.(   t   _HTMLParser__starttag_text(   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyt   get_starttag_text€   s    c         C   s2   | j  ƒ  |  _ t j d |  j t j ƒ |  _ d  S(   Ns   </\s*%s\s*>(   t   lowerR   t   ret   compilet   IR   (   R   t   elem(    (    s    /usr/lib/python2.7/HTMLParser.pyt   set_cdata_mode„   s    c         C   s   t  |  _ d  |  _ d  S(   N(   R   R   R	   R   (   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyt   clear_cdata_modeˆ   s    	c   
      C   s|  |  j  } d } t | ƒ } x| | k  r%|  j j | | ƒ } | rT | j ƒ  } n |  j ra Pn  | } | | k  rŠ |  j | | | !ƒ n  |  j | | ƒ } | | k r¬ Pn  | j } | d | ƒ r7t	 j
 | | ƒ rè |  j | ƒ } n¯ | d | ƒ r	|  j | ƒ } nŽ | d | ƒ r*|  j | ƒ } nm | d | ƒ rK|  j | ƒ } nL | d | ƒ rl|  j | ƒ } n+ | d | k  r–|  j d ƒ | d } n P| d k  r"| s­Pn  | j d | d ƒ } | d k  r| j d | d ƒ } | d k  r| d } qn
 | d 7} |  j | | | !ƒ n  |  j | | ƒ } q | d	 | ƒ rt j
 | | ƒ } | rÂ| j ƒ  d
 d !}	 |  j |	 ƒ | j ƒ  } | d | d ƒ sª| d } n  |  j | | ƒ } q q"d | | k r|  j | | | d
 !ƒ |  j | | d
 ƒ } n  Pq | d | ƒ rt j
 | | ƒ } | rŽ| j d ƒ }	 |  j |	 ƒ | j ƒ  } | d | d ƒ sv| d } n  |  j | | ƒ } q n  t j
 | | ƒ } | rÖ| rÒ| j ƒ  | | k rÒ|  j d ƒ n  Pq"| d | k  r|  j d ƒ |  j | | d ƒ } q"Pq d s t d ƒ ‚ q W| rk| | k  rk|  j rk|  j | | | !ƒ |  j | | ƒ } n  | | |  _  d  S(   Ni    t   <s   </s   <!--s   <?s   <!i   R    s   &#i   iÿÿÿÿt   ;t   &s#   EOF in middle of entity or char refs   interesting.search() lied(   R   t   lenR   t   searcht   startR   t   handle_datat	   updatepost
   startswitht   starttagopent   matcht   parse_starttagt   parse_endtagt   parse_commentt   parse_pit   parse_html_declarationt   findt   charreft   groupt   handle_charreft   endt	   entityreft   handle_entityreft
   incompleteR!   R   (
   R   R?   R   t   it   nR5   t   jR3   t   kt   name(    (    s    /usr/lib/python2.7/HTMLParser.pyR      s     		  	
c         C   sì   |  j  } | | | d !d k r0 |  j d ƒ n  | | | d !d k rT |  j | ƒ S| | | d !d k rx |  j | ƒ S| | | d !j ƒ  d	 k rÛ | j d
 | d ƒ } | d k r» d S|  j | | d | !ƒ | d S|  j | ƒ Sd  S(   Ni   s   <!s+   unexpected call to parse_html_declaration()i   s   <!--i   s   <![i	   s	   <!doctypeR    iÿÿÿÿi   (   R   R!   R8   t   parse_marked_sectionR$   R;   t   handle_declt   parse_bogus_comment(   R   RC   R   t   gtpos(    (    s    /usr/lib/python2.7/HTMLParser.pyR:   í   s    	i   c         C   s   |  j  } | | | d !d k r0 |  j d ƒ n  | j d | d ƒ } | d k rV d S| rw |  j | | d | !ƒ n  | d S(	   Ni   s   <!s   </s"   unexpected call to parse_comment()R    iÿÿÿÿi   (   s   <!s   </(   R   R!   R;   t   handle_comment(   R   RC   t   reportR   t   pos(    (    s    /usr/lib/python2.7/HTMLParser.pyRJ     s    	c         C   s€   |  j  } | | | d !d k s, t d ƒ ‚ t j | | d ƒ } | sL d S| j ƒ  } |  j | | d | !ƒ | j ƒ  } | S(   Ni   s   <?s   unexpected call to parse_pi()iÿÿÿÿ(   R   R   t   picloseR/   R0   t	   handle_piR?   (   R   RC   R   R5   RE   (    (    s    /usr/lib/python2.7/HTMLParser.pyR9     s    	#c         C   s€  d  |  _ |  j | ƒ } | d k  r( | S|  j } | | | !|  _ g  } t j | | d ƒ } | so t d ƒ ‚ | j ƒ  } | j d ƒ j	 ƒ  |  _
 } xî | | k  r‡t j | | ƒ } | sÂ Pn  | j d d d ƒ \ }	 }
 } |
 sï d  } nX | d  d k o| d k n s7| d  d k o2| d k n rG| d d !} n  | r_|  j | ƒ } n  | j |	 j	 ƒ  | f ƒ | j ƒ  } qš W| | | !j ƒ  } | d k r+|  j ƒ  \ } } d |  j k r | |  j j d ƒ } t |  j ƒ |  j j d ƒ } n | t |  j ƒ } |  j | | | !ƒ | S| j d
 ƒ rM|  j | | ƒ n/ |  j | | ƒ | |  j k r||  j | ƒ n  | S(   Ni    i   s#   unexpected call to parse_starttag()i   i   s   'iÿÿÿÿt   "R    s   />s   
(   R    s   />(   R	   R"   t   check_for_whole_start_tagR   t   tagfindR5   R   R?   R=   R$   R   t   attrfindt   unescapet   appendt   stripR   t   countR.   t   rfindR1   t   endswitht   handle_startendtagt   handle_starttagt   CDATA_CONTENT_ELEMENTSR)   (   R   RC   t   endposR   t   attrsR5   RF   t   tagt   mt   attrnamet   restt	   attrvalueR?   R   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyR6     sR    			$$c         C   sý   |  j  } t j | | ƒ } | rí | j ƒ  } | | | d !} | d k rR | d S| d k r² | j d | ƒ rx | d S| j d | ƒ rŽ d S|  j | | d ƒ |  j d ƒ n  | d k rÂ d S| d	 k rÒ d S| | k râ | S| d Sn  t d
 ƒ ‚ d  S(   Ni   R    t   /s   />i   iÿÿÿÿs   malformed empty start tagR   s6   abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZs   we should not get here!(   R   t   locatestarttagendR5   R?   R3   R2   R!   R   (   R   RC   R   Ra   RE   t   next(    (    s    /usr/lib/python2.7/HTMLParser.pyRR   N  s,    	c         C   s  |  j  } | | | d !d k s, t d ƒ ‚ t j | | d ƒ } | sL d S| j ƒ  } t j | | ƒ } | s$|  j d  k	 r— |  j	 | | | !ƒ | St
 j | | d ƒ } | sâ | | | d !d k rÒ | d S|  j | ƒ Sn  | j d ƒ j ƒ  } | j d | j ƒ  ƒ } |  j | ƒ | d S| j d ƒ j ƒ  } |  j d  k	 rr| |  j k rr|  j	 | | | !ƒ | Sn  |  j | ƒ |  j ƒ  | S(	   Ni   s   </s   unexpected call to parse_endtagi   iÿÿÿÿi   s   </>R    (   R   R   t	   endendtagR/   R?   t
   endtagfindR5   R   R	   R1   RS   RJ   R=   R$   R;   t   handle_endtagR*   (   R   RC   R   R5   RK   t	   namematcht   tagnameR(   (    (    s    /usr/lib/python2.7/HTMLParser.pyR7   n  s8    	#
c         C   s!   |  j  | | ƒ |  j | ƒ d  S(   N(   R\   Rj   (   R   R`   R_   (    (    s    /usr/lib/python2.7/HTMLParser.pyR[   –  s    c         C   s   d  S(   N(    (   R   R`   R_   (    (    s    /usr/lib/python2.7/HTMLParser.pyR\   ›  s    c         C   s   d  S(   N(    (   R   R`   (    (    s    /usr/lib/python2.7/HTMLParser.pyRj   Ÿ  s    c         C   s   d  S(   N(    (   R   RG   (    (    s    /usr/lib/python2.7/HTMLParser.pyR>   £  s    c         C   s   d  S(   N(    (   R   RG   (    (    s    /usr/lib/python2.7/HTMLParser.pyRA   §  s    c         C   s   d  S(   N(    (   R   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyR1   «  s    c         C   s   d  S(   N(    (   R   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyRL   ¯  s    c         C   s   d  S(   N(    (   R   t   decl(    (    s    /usr/lib/python2.7/HTMLParser.pyRI   ³  s    c         C   s   d  S(   N(    (   R   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyRP   ·  s    c         C   s   d  S(   N(    (   R   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyt   unknown_declº  s    c            s2   d | k r | S‡  f d †  } t  j d | | ƒ S(   NR-   c            s  |  j  ƒ  d }  yZ |  d d k ri |  d }  |  d d k rS t |  d d ƒ } n t |  ƒ } t | ƒ SWn t k
 r† d |  d SXt j d  k rë d	 d  l } i d
 d 6} x- | j j	 ƒ  D] \ } } t | ƒ | | <q¿ W| t _ n  y ˆ  j |  SWn t
 k
 rd |  d SXd  S(   Ni    t   #i   t   xt   Xi   s   &#R,   iÿÿÿÿu   't   aposR-   (   Rp   Rq   (   t   groupst   intt   unichrt
   ValueErrorR   t
   entitydefsR	   t   htmlentitydefst   name2codepointt	   iteritemst   KeyError(   t   st   cRx   Rw   RF   t   v(   R   (    s    /usr/lib/python2.7/HTMLParser.pyt   replaceEntitiesÂ  s(    
s#   &(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));(   R%   t   sub(   R   R|   R   (    (   R   s    /usr/lib/python2.7/HTMLParser.pyRU   ¿  s    (   R   R   N(!   R   R   R   R]   R   R   R   R   R!   R	   R"   R#   R)   R*   R   R:   RJ   R9   R6   RR   R7   R[   R\   Rj   R>   RA   R1   RL   RI   RP   Rn   Rw   RU   (    (    (    s    /usr/lib/python2.7/HTMLParser.pyR   K   s<   										^			4	 	(										(   R   R   R%   R&   R   RB   R@   R<   R4   RO   t   commentcloseRS   t   tagfind_tolerantRT   t   VERBOSERf   Rh   Ri   t	   ExceptionR   R   R   (    (    (    s    /usr/lib/python2.7/HTMLParser.pyt   <module>   s&   
	