#!/usr/bin/env python # By Joshua E Cook, joshua at joshuacook dot com from __future__ import with_statement import re LogFile = "o10kx100.ap" # size of the read buffer for the file returned by builtin open() BufferLength = 0x40000 # mode passed to builtin open() ReadMode = "rb" Fuzz = "GET /ongoing/When" def fuzzy(fuzz, seq): """Yields a counted sequences of substrings starting at the beginning of fuzz up to the first space character after the end of fuzz. """ len_fuzz = len(fuzz) for line in (line for line in seq if fuzz in line): start = line.find(fuzz) end = line.find(' ',start + len_fuzz) yield line[start : end], 1 def count(seqn): """Sums the number of occurances of unique items in a counted sequence.""" count = {} setdefault = count.setdefault for line, n in seqn: count[line] = n + setdefault(line,0) return count.iteritems() Pattern = re.compile(r"^GET /ongoing/When/\d\d\dx/(\d\d\d\d/\d\d/\d\d/[^ .]+)$") def actual(search, seqn): """Performs a precise regular expression search on a counted sequence. Yields a counted seqence containing the first subgroup of the match. Omits unmatched items. """ for match, n in ((search(item),n) for item,n in seqn): if match: yield match.group(1),n with open(LogFile,ReadMode,BufferLength) as log: counts = actual(Pattern.search,count(fuzzy(Fuzz,log))) second = lambda seq: seq[1] for k, v in sorted(counts,reverse=True,key=second)[:10]: print "%s: %d"%(k,v)