ó
è¾bc           @   s}   d  Z  d d l Z d d l Z d g Z d d d „  ƒ  YZ d d d „  ƒ  YZ d d d „  ƒ  YZ d	 e j f d
 „  ƒ  YZ d S(   s&   robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://www.robotstxt.org/norobots-rfc.txt

iÿÿÿÿNt   RobotFileParserc           B   sb   e  Z d  Z d d „ Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d	 „  Z
 d
 „  Z RS(   ss    This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    t    c         C   s>   g  |  _  d  |  _ t |  _ t |  _ |  j | ƒ d |  _ d  S(   Ni    (   t   entriest   Nonet   default_entryt   Falset   disallow_allt	   allow_allt   set_urlt   last_checked(   t   selft   url(    (    s!   /usr/lib/python2.7/robotparser.pyt   __init__   s    				c         C   s   |  j  S(   s·   Returns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        (   R	   (   R
   (    (    s!   /usr/lib/python2.7/robotparser.pyt   mtime!   s    c         C   s   d d l  } | j  ƒ  |  _ d S(   sY   Sets the time the robots.txt file was last fetched to the
        current time.

        iÿÿÿÿN(   t   timeR	   (   R
   R   (    (    s!   /usr/lib/python2.7/robotparser.pyt   modified*   s    c         C   s/   | |  _  t j | ƒ d d !\ |  _ |  _ d S(   s,   Sets the URL referring to a robots.txt file.i   i   N(   R   t   urlparset   hostt   path(   R
   R   (    (    s!   /usr/lib/python2.7/robotparser.pyR   2   s    	c         C   s¾   t  ƒ  } | j |  j ƒ } g  | D] } | j ƒ  ^ q" } | j ƒ  | j |  _ |  j d k rk t |  _ nO |  j d k r• |  j d k  r• t |  _ n% |  j d k rº | rº |  j	 | ƒ n  d S(   s4   Reads the robots.txt URL and feeds it to the parser.i‘  i“  i  iô  iÈ   N(   i‘  i“  (
   t	   URLopenert   openR   t   stript   closet   errcodet   TrueR   R   t   parse(   R
   t   openert   ft   linet   lines(    (    s!   /usr/lib/python2.7/robotparser.pyt   read7   s    	
c         C   sA   d | j  k r- |  j d  k r= | |  _ q= n |  j j | ƒ d  S(   Nt   *(   t
   useragentsR   R   R   t   append(   R
   t   entry(    (    s!   /usr/lib/python2.7/robotparser.pyt
   _add_entryE   s    c         C   s&  d } d } t  ƒ  } |  j ƒ  xä| D]Ü} | d 7} | sˆ | d k rZ t  ƒ  } d } qˆ | d k rˆ |  j | ƒ t  ƒ  } d } qˆ n  | j d ƒ } | d k r° | |  } n  | j ƒ  } | sÈ q& n  | j d d ƒ } t | ƒ d k r& | d j ƒ  j ƒ  | d <t j	 | d j ƒ  ƒ | d <| d d k ru| d k rX|  j | ƒ t  ƒ  } n  | j
 j | d ƒ d } q| d d k rº| d k rÿ| j j t | d t ƒ ƒ d } qÿq| d d k r| d k rÿ| j j t | d t ƒ ƒ d } qÿqq& q& W| d k r"|  j | ƒ n  d	 S(
   s   parse the input lines from a robots.txt file.
           We allow that a user-agent: line is not preceded by
           one or more blank lines.i    i   i   t   #t   :s
   user-agentt   disallowt   allowN(   t   EntryR   R#   t   findR   t   splitt   lent   lowert   urllibt   unquoteR    R!   t	   rulelinest   RuleLineR   R   (   R
   R   t   statet
   linenumberR"   R   t   i(    (    s!   /usr/lib/python2.7/robotparser.pyR   N   sP    	

				c         C   s×   |  j  r t S|  j r t S|  j s' t St j t j | ƒ ƒ } t j d d | j	 | j
 | j | j f ƒ } t j | ƒ } | sŠ d } n  x- |  j D]" } | j | ƒ r” | j | ƒ Sq” W|  j rÓ |  j j | ƒ St S(   s=   using the parsed robots.txt decide if useragent can fetch urlR   t   /(   R   R   R   R   R	   R   R-   R.   t
   urlunparseR   t   paramst   queryt   fragmentt   quoteR   t
   applies_tot	   allowanceR   (   R
   t	   useragentR   t
   parsed_urlR"   (    (    s!   /usr/lib/python2.7/robotparser.pyt	   can_fetch‚   s$    					c         C   sE   |  j  } |  j d  k	 r+ | |  j g } n  d j t t | ƒ ƒ d S(   Ns   
(   R   R   R   t   joint   mapt   str(   R
   R   (    (    s!   /usr/lib/python2.7/robotparser.pyt   __str__¢   s    	(   t   __name__t
   __module__t   __doc__R   R   R   R   R   R#   R   R>   RB   (    (    (    s!   /usr/lib/python2.7/robotparser.pyR       s   								4	 R0   c           B   s)   e  Z d  Z d „  Z d „  Z d „  Z RS(   so   A rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.c         C   sS   | d k r | r t  } n  t j t j | ƒ ƒ } t j | ƒ |  _ | |  _ d  S(   NR   (   R   R   R5   R-   R9   R   R;   (   R
   R   R;   (    (    s!   /usr/lib/python2.7/robotparser.pyR   ¬   s
    	c         C   s   |  j  d k p | j |  j  ƒ S(   NR   (   R   t
   startswith(   R
   t   filename(    (    s!   /usr/lib/python2.7/robotparser.pyR:   ´   s    c         C   s   |  j  r d p d d |  j S(   Nt   Allowt   Disallows   : (   R;   R   (   R
   (    (    s!   /usr/lib/python2.7/robotparser.pyRB   ·   s    (   RC   RD   RE   R   R:   RB   (    (    (    s!   /usr/lib/python2.7/robotparser.pyR0   ©   s   		R(   c           B   s2   e  Z d  Z d „  Z d „  Z d „  Z d „  Z RS(   s?   An entry has one or more user-agents and zero or more rulelinesc         C   s   g  |  _  g  |  _ d  S(   N(   R    R/   (   R
   (    (    s!   /usr/lib/python2.7/robotparser.pyR   ½   s    	c         C   sj   g  } x' |  j  D] } | j d | d g ƒ q Wx* |  j D] } | j t | ƒ d g ƒ q: Wd j | ƒ S(   Ns   User-agent: s   
R   (   R    t   extendR/   RA   R?   (   R
   t   rett   agentR   (    (    s!   /usr/lib/python2.7/robotparser.pyRB   Á   s    c         C   s]   | j  d ƒ d j ƒ  } x= |  j D]2 } | d k r9 t S| j ƒ  } | | k r# t Sq# Wt S(   s2   check if this entry applies to the specified agentR4   i    R   (   R*   R,   R    R   R   (   R
   R<   RL   (    (    s!   /usr/lib/python2.7/robotparser.pyR:   É   s    c         C   s.   x' |  j  D] } | j | ƒ r
 | j Sq
 Wt S(   sZ   Preconditions:
        - our agent applies to this entry
        - filename is URL decoded(   R/   R:   R;   R   (   R
   RG   R   (    (    s!   /usr/lib/python2.7/robotparser.pyR;   Ö   s    (   RC   RD   RE   R   RB   R:   R;   (    (    (    s!   /usr/lib/python2.7/robotparser.pyR(   »   s
   			R   c           B   s#   e  Z d  „  Z d „  Z d „  Z RS(   c         G   s    t  j j |  | Œ d |  _ d  S(   NiÈ   (   R-   t   FancyURLopenerR   R   (   R
   t   args(    (    s!   /usr/lib/python2.7/robotparser.pyR   à   s    c         C   s   d S(   N(   NN(   R   (   R
   R   t   realm(    (    s!   /usr/lib/python2.7/robotparser.pyt   prompt_user_passwdä   s    c         C   s(   | |  _  t j j |  | | | | | ƒ S(   N(   R   R-   RM   t   http_error_default(   R
   R   t   fpR   t   errmsgt   headers(    (    s!   /usr/lib/python2.7/robotparser.pyRQ   é   s    	(   RC   RD   R   RP   RQ   (    (    (    s!   /usr/lib/python2.7/robotparser.pyR   ß   s   		(    (    (    (	   RE   R   R-   t   __all__R    R0   R(   RM   R   (    (    (    s!   /usr/lib/python2.7/robotparser.pyt   <module>   s   	–$