#!/usr/bin/python
# Automatically download "Il Fatto Quotidiano", authenticating to the PDF archive
#
# Copyright: (C) 2009-2010 Stefano Zacchiroli <zack@pps.jussieu.fr>
# License:   GNU General Public License 3 or above
#
# Last-Modified: Tue, 27 Jul 2010 08:36:00 +0200

import datetime, mechanize, re, sys

USERNAME = "quilatua@email"
PASSWORD = "quilatuapassword"
LOGIN_URL = 'http://www.ilfattoquotidiano.it/login/?redirect_to=http://www.ilfattoquotidiano.it/abbonati/'

def main():
    br = mechanize.Browser()
    br.open(LOGIN_URL)

    # stage 1: login into PDF archive
    br.select_form('loginform')
    br['log'] = USERNAME
    br['pwd'] = PASSWORD
    login_res = br.submit()

    # stage 2: set (today's) date
    br.select_form(nr=1)
    date_res = br.submit()

    # stage 3: retrieve PDF
    # pdf_res = br.follow_link(url_regex=re.compile(r'openpdf'))
    now = datetime.datetime.now()
    pdf_res = br.open('http://www.ilfattoquotidiano.it/openpdf/?n=%s' % \
                          now.strftime('%Y%m%d'))

    # stage 4: save PDF
     # sample content-disposition header: attachment; filename=ilfatto20091008.pdf
    cdisp = pdf_res._headers['content-disposition']
    if cdisp.startswith('attachment; '):
        fname = cdisp.split()[1].split('=')[1]
        fname = fname.strip('" \t\r\n')  # remove surrounding garbage
    else:
        print >> sys.stderr, "W: can't guess filename, saving to ilfatto.pdf"
        fname = 'ilfatto.pdf'
    pdf_file = file(fname, 'w')
    pdf_file.write(pdf_res.get_data())
    pdf_file.close()

    print fname

if __name__ == '__main__':
    main()

