# coding: utf-8
"""
atomstream: a more efficient way to get feed updates
http://www.aaronsw.com/2002/atomstream/

    >>> import atomstream
    >>> for update in atomstream.connect():
    ...     print "'%s...'" % update[:20]
    '<feed xmlns="http://...'
    '<feed xmlns="http://...'
    '<feed xmlns="http://...'
    '<feed xmlns="http://...'
    '<feed xmlns="http://...'
    '<feed xmlns="http://...'
    '<feed xmlns="http://...'
    '<feed xmlns="http://...'
    '<feed xmlns="http://...'
    '<feed xmlns="http://...'
    ^C
    >>> import feedparser
    >>> from itertools import imap
    >>> for update in imap(feedparser.parse, atomstream.connect()):
            print update.entries[0].title.encode('utf8')
    Валєрій Нуґатов / З циклу "ФРІЛАНС"

    Ну и зайчик...

    Squee.
    линейная графика
    Как я боролась с павианами

    And you thought I was obsessed....
    一个比利时的 dapper 源
    Видеоподборка
    Synchronisma Promotional Video... and More

    ^C
"""
__author__ = "Aaron Swartz <http://www.aaronsw.com/>"
__version__ = "1.1"
__license__ = "public domain"

import urllib, logging
from xml.sax.handler import EntityResolver, DTDHandler, ContentHandler, ErrorHandler
from xml.sax.saxutils import XMLGenerator
from xml.sax import make_parser
from StringIO import StringIO

class Streamer(EntityResolver, DTDHandler, ContentHandler, ErrorHandler):
    def __init__(self):
        self.time = 0
        self.reset()
    
    def reset(self):
        self.mode = None
        self.chr = []
        self.results = []
        
    def startElement(self, name, attr):
        if name == 'atomStream':
            self.mode = 'atomStream'
            
        elif self.mode == 'atomStream' and name == 'time':
            self.mode = 'time'
            self.chr = []

        elif name == 'sorryTooSlow':
            logging.warn('too slow, you missed'+dict(attr)['youMissed'])
        
        elif name == 'feed':
            self.mode = 'feed'
            self.feeddata = StringIO()
            self.feedster = XMLGenerator(self.feeddata)
        
        if self.mode == 'feed':
            self.feedster.startElement(name, attr)
    
    def characters(self, ch):
        if self.mode == 'feed':
            self.feedster.characters(ch)
        else:
            self.chr.append(ch)
    
    def endElement(self, name):
        if self.mode == 'feed':
            self.feedster.endElement(name)
        
        if self.mode == 'time' and name == 'time':
            self.mode = 'atomStream'
            self.time = int(''.join(self.chr))
            self.chr = []
        elif self.mode == 'feed' and name == 'feed':
            self.results.append(self.feeddata.getvalue())
            self.mode = 'atomStream'

def connect(feed="http://updates.sixapart.com/atom-stream.xml"):
    s = Streamer()
    p = make_parser()
    p.setContentHandler(s)
    while 1:
        try:
            d = urllib.urlopen(feed+'?since='+str(s.time))
            for line in iter(d.readline, None):
                p.feed(line)
                while s.results: yield s.results.pop(0)
        except:
            raise
            s.reset()
