#!/usr/bin/env python import json import pendulum from pprint import pprint import os.path from subprocess import check_output import feedparser from jinja2 import Environment, FileSystemLoader, select_autoescape env = Environment( loader=FileSystemLoader('templates/'), autoescape=select_autoescape(['html', 'xml']) ) # Filter / delete the old files here import os import argparse parser = argparse.ArgumentParser() parser.add_argument("-e", "--expire", type=int, help="Expire files after this many days.", default=7) parser.add_argument("-f", "--fresh", help="Download fresh copy of RSS feed.", action="store_true") parser.add_argument("-n", "--new", type=int, help="Number of Days (range) we consider to be new.", default=2) parser.add_argument("-v", "--verbose", help="Display more verbose information.", action="store_true") parser.add_argument("--hour", type=int, help="Run every so many hours.", default=4) args = parser.parse_args() now = pendulum.now() import sys if now.hour % args.hour != 0: if args.verbose: print("NO, NOT YET!") sys.exit(0) for f in os.listdir(): if f.endswith('.html') or f.endswith('.text'): created = pendulum.from_timestamp(os.path.getctime(f), tz='local') age = now-created if age.in_days() >= args.expire: # Older then 7 days print("Removing old file {0} ({1} >= {2}).".format(f, age.in_days(), args.expire)) os.unlink(f) else: if args.verbose: print("Keep {0} is {1} day(s) old.".format(f, age.in_days())) url = 'https://usn.ubuntu.com/rss.xml' if args.fresh: data = feedparser.parse(url) del( data['bozo_exception']) with open('rss.json', 'w') as fp: json.dump(data, fp) else: print("Loading stale data.") with open('rss.json') as fp: data = json.load(fp) # For working "offline" # with open('rss.json') as fp: # data = json.load(fp) # pprint(data) output = { 'total': 0, 'entries': list() } for entry in data['entries']: # when = pendulum.parse(entry['published']) # Mon, 18 Nov 2019 12:42:01 +0000 when = pendulum.from_format(entry['published'], "ddd, D MMM YYYY HH:mm:ss ZZ") title = entry['title'] age = now - when if age.in_days() > args.new: # Skip over this old record! if args.verbose: # print("Skipping {0} : {1} days old. {2}".format(title, age.in_days(), when)) print("Skipping {0} : {1} days old.".format(title, age.in_days())) continue print("Possible:", when.to_datetime_string(), "Title:", title) # print(when.to_datetime_string(), title) filename = "{0}.html".format(title) textname = "{0}.text".format(title) textname = textname.replace(':', '').replace(' ', '_') if not os.path.exists(filename): print("Missing/TO Display:", entry['published'], title) with open(filename, "wb") as fp: fp.write(entry['summary'].encode('utf-8')) # Ok, convert into text # elinks -dump 0 -dump-width 72 --no-references --no-numbering ./1.html text = check_output( ["elinks", "-dump", "0", "-dump-width", "72", "--no-references", "--no-numbering", "./" + filename], universal_newlines=True, shell=False) with open(textname, "w") as fp: fp.write(text) (usn, _, short_title) = title.partition(':') output['entries'].append({"filename": textname, "title": short_title}) output['total'] = output['total'] + 1 # print(text) # print(entry['summary']) if output['total'] > 0: template = env.get_template('ubuntu.ini') with open('../ubuntu.ini', 'w') as fp: fp.write(template.render(output))