Changeset 220

Show
Ignore:
Timestamp:
03/13/08 18:50:57 (8 months ago)
Author:
haypo
Message:

Fix parser for user agent with \"

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • apache_log/apache_log/parser.py

    r139 r220  
    5454        else: 
    5555            self.code = None 
    56 #        if 'user_agent' in data: 
    57 #            self.user_agent = data['user_agent'] 
     56        if 'user_agent' in data: 
     57            self.user_agent = self.unquote(data['user_agent']) 
    5858#            tag = userAgentTag(parser.error, self.user_agent) 
    5959#            if tag: 
    6060#                self.tag = tag 
    61 #        else: 
    62 #            self.user_agent = None 
     61        else: 
     62            self.user_agent = None 
    6363 
    6464    def unquote(self, url): 
     65        url = url.replace(r'\"', '"') 
    6566        url = unquote(url) 
    6667        if url != "-": 
     
    7071 
    7172class ApacheLogParser: 
     73    _REGEX_STRING = r'(?:[^\"]|\")*' 
     74 
    7275    # Referrer URL -> referrer="http://..." 
    73     REGEX_REFERRER = r"\"(?P<referrer>(?:[^\"]|\\\")*)\"" 
     76    REGEX_REFERRER = r'"(?P<referrer>%s)"' % _REGEX_STRING 
    7477 
    7578    # User agent: "Mozilla ..." -> user_agent="Mozilla ..." 
    76     REGEX_USER_AGENT = r"\"(?P<user_agent>[^\"]*)\"" 
     79    REGEX_USER_AGENT = r'"(?P<user_agent>%s)"' % _REGEX_STRING 
    7780 
    7881    # HTTP request: 
     
    8790    ) 
    8891    METHODS = HTTP_METHODS + WEBDAV_METHODS 
    89     REGEX_REQUEST = r"\"(?P<method>%s) (?P<url>[^ ]+)(?: HTTP/(?P<http_version>1\.[01]))?\"" % '|'.join(METHODS) 
     92    REGEX_REQUEST = r'"(?P<method>%s) (?P<url>[^ ]+)(?: HTTP/(?P<http_version>1\.[01]))?"' % '|'.join(METHODS) 
    9093 
    9194    # Date -> date="..."