123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- #!/usr/bin/env python
- import json
- import pendulum
- from pprint import pprint
- import os.path
- from subprocess import check_output
- import feedparser
- from jinja2 import Environment, FileSystemLoader, select_autoescape
- env = Environment(
- loader=FileSystemLoader('templates/'),
- autoescape=select_autoescape(['html', 'xml'])
- )
- # Filter / delete the old files here
- import os
- import argparse
- parser = argparse.ArgumentParser()
- parser.add_argument("-e", "--expire", type=int, help="Expire files after this many days.", default=7)
- parser.add_argument("-f", "--fresh", help="Download fresh copy of RSS feed.", action="store_true")
- parser.add_argument("-n", "--new", type=int, help="Number of Days (range) we consider to be new.", default=2)
- parser.add_argument("-v", "--verbose", help="Display more verbose information.", action="store_true")
- args = parser.parse_args()
- now = pendulum.now()
- for f in os.listdir():
- if f.endswith('.html') or f.endswith('.text'):
- created = pendulum.from_timestamp(os.path.getctime(f), tz='local')
- age = now-created
- if age.in_days() >= args.expire:
- # Older then 7 days
- print("Removing old file {0} ({1} >= {2}).".format(f, age.in_days(), args.expire))
- os.unlink(f)
- else:
- if args.verbose:
- print("Keep {0} is {1} day(s) old.".format(f, age.in_days()))
- url = 'https://usn.ubuntu.com/rss.xml'
- if args.fresh:
- data = feedparser.parse(url)
- del( data['bozo_exception'])
- with open('rss.json', 'w') as fp:
- json.dump(data, fp)
- else:
- print("Loading stale data.")
- with open('rss.json') as fp:
- data = json.load(fp)
- # For working "offline"
- # with open('rss.json') as fp:
- # data = json.load(fp)
- # pprint(data)
- output = { 'total': 0, 'entries': list() }
- for entry in data['entries']:
- # when = pendulum.parse(entry['published'])
- # Mon, 18 Nov 2019 12:42:01 +0000
- when = pendulum.from_format(entry['published'], "ddd, D MMM YYYY HH:mm:ss ZZ")
- title = entry['title']
- age = now - when
- if age.in_days() > args.new:
- # Skip over this old record!
- if args.verbose:
- # print("Skipping {0} : {1} days old. {2}".format(title, age.in_days(), when))
- print("Skipping {0} : {1} days old.".format(title, age.in_days()))
- continue
- print("Possible:", when.to_datetime_string(), "Title:", title)
- # print(when.to_datetime_string(), title)
- filename = "{0}.html".format(title)
- textname = "{0}.text".format(title)
- textname = textname.replace(':', '').replace(' ', '_')
- if not os.path.exists(filename):
- print("Missing/TO Display:", entry['published'], title)
- with open(filename, "wb") as fp:
- fp.write(entry['summary'].encode('utf-8'))
- # Ok, convert into text
- # elinks -dump 0 -dump-width 72 --no-references --no-numbering ./1.html
- text = check_output( ["elinks", "-dump", "0", "-dump-width", "72", "--no-references", "--no-numbering", "./" + filename], universal_newlines=True, shell=False)
- with open(textname, "w") as fp:
- fp.write(text)
- (usn, _, short_title) = title.partition(':')
- output['entries'].append({"filename": textname, "title": short_title})
- output['total'] = output['total'] + 1
- # print(text)
- # print(entry['summary'])
- if output['total'] > 0:
- template = env.get_template('ubuntu.ini')
- with open('../ubuntu.ini', 'w') as fp:
- fp.write(template.render(output))
|