"""
atomstream: a more efficient way to get feed updates
http://www.aaronsw.com/2002/atomstream/

    >>> import atomstream
    >>> def callback(f):
    ...     print f.feed.link
    ... 
    >>> atomstream.connect(callback)
    http://highasophia.livejournal.com/
    http://petthejesus.livejournal.com/
    http://falida.livejournal.com/
    [...and so on...]

callback receives a feed object as parsed by feedparser.
"""
__author__ = "Aaron Swartz <http://www.aaronsw.com/>"
__version__ = "1.0"
__license__ = "public domain"

import urllib, logging
from xml.sax.handler import EntityResolver, DTDHandler, ContentHandler, ErrorHandler
from xml.sax.saxutils import XMLGenerator
from xml.sax import make_parser
from StringIO import StringIO

import feedparser

class Streamer(EntityResolver, DTDHandler, ContentHandler, ErrorHandler):
    def __init__(self, callback):
        self.callback = callback
        self.time = 0
        self.reset()
    
    def reset(self):
        self.mode = None
        self.chr = []
        
    def startElement(self, name, attr):
        if name == 'atomStream':
            self.mode = 'atomStream'
            
        elif self.mode == 'atomStream' and name == 'time':
            self.mode = 'time'
            self.chr = []

        elif name == 'sorryTooSlow':
            logging.warn('too slow, you missed'+dict(attr)['youMissed'])
        
        elif name == 'feed':
            self.mode = 'feed'
            self.feeddata = StringIO()
            self.feedster = XMLGenerator(self.feeddata)
        
        if self.mode == 'feed':
            self.feedster.startElement(name, attr)
    
    def characters(self, ch):
        if self.mode == 'feed':
            self.feedster.characters(ch)
        else:
            self.chr.append(ch)
    
    def endElement(self, name):
        if self.mode == 'feed':
            self.feedster.endElement(name)
        
        if self.mode == 'time' and name == 'time':
            self.mode = 'atomStream'
            self.time = int(''.join(self.chr))
            self.chr = []
        elif self.mode == 'feed' and name == 'feed':
            self.callback(feedparser.parse(self.feeddata.getvalue()))
            self.mode = 'atomStream'

def connect(callback, feed="http://updates.sixapart.com/atom-stream.xml"):
    s = Streamer(callback)
    p = make_parser()
    p.setContentHandler(s)
    while 1:
        try:
            d = urllib.urlopen(feed+'?since='+str(s.time))
            for line in iter(d.readline, None):
                p.feed(line)
        except:
            raise
            s.reset()
