|
@@ -17,29 +17,42 @@ env = Environment(
|
|
|
# Filter / delete the old files here
|
|
|
|
|
|
import os
|
|
|
-import time
|
|
|
+import argparse
|
|
|
|
|
|
-now = time.time()
|
|
|
+parser = argparse.ArgumentParser()
|
|
|
+parser.add_argument("-e", "--expire", type=int, help="Expire files after this many days.", default=7)
|
|
|
+parser.add_argument("-f", "--fresh", help="Download fresh copy of RSS feed.", action="store_true")
|
|
|
+parser.add_argument("--hours", type=int, help="Number of hours (range) we consider to be new.", default=12)
|
|
|
+parser.add_argument("-v", "--verbose", help="Display more verbose information.", action="store_true")
|
|
|
+args = parser.parse_args()
|
|
|
+
|
|
|
+now = pendulum.now()
|
|
|
|
|
|
for f in os.listdir():
|
|
|
if f.endswith('.html') or f.endswith('.text'):
|
|
|
- created = os.path.getctime(f)
|
|
|
- if (now - created) // (24 * 3600) >= 7:
|
|
|
+ created = pendulum.from_timestamp(os.path.getctime(f), tz='local')
|
|
|
+ age = now-created
|
|
|
+ if age.in_days() >= args.expire:
|
|
|
# Older then 7 days
|
|
|
os.unlink(f)
|
|
|
print("Removed {}".format(f))
|
|
|
-
|
|
|
+ else:
|
|
|
+ if args.verbose:
|
|
|
+ print("{0} is {1} day(s) old.".format(f, age.in_days()))
|
|
|
|
|
|
|
|
|
url = 'https://usn.ubuntu.com/rss.xml'
|
|
|
|
|
|
-data = feedparser.parse(url)
|
|
|
-# pprint(data)
|
|
|
-# Delete this key, so we can save as json.
|
|
|
-del( data['bozo_exception'])
|
|
|
+if args.fresh:
|
|
|
+ data = feedparser.parse(url)
|
|
|
+ del( data['bozo_exception'])
|
|
|
|
|
|
-with open('rss.json', 'w') as fp:
|
|
|
- json.dump(data, fp)
|
|
|
+ with open('rss.json', 'w') as fp:
|
|
|
+ json.dump(data, fp)
|
|
|
+else:
|
|
|
+ print("Loading stale data.")
|
|
|
+ with open('rss.json') as fp:
|
|
|
+ data = json.load(fp)
|
|
|
|
|
|
# For working "offline"
|
|
|
# with open('rss.json') as fp:
|
|
@@ -52,8 +65,21 @@ output = { 'total': 0, 'entries': list() }
|
|
|
for entry in data['entries']:
|
|
|
# when = pendulum.parse(entry['published'])
|
|
|
# Mon, 18 Nov 2019 12:42:01 +0000
|
|
|
- # when = pendulum.from_format(entry['published'], "ddd, D MMM YYYY HH:mm:ss ZZ")
|
|
|
+ when = pendulum.from_format(entry['published'], "ddd, D MMM YYYY HH:mm:ss ZZ")
|
|
|
title = entry['title']
|
|
|
+ age = now - when
|
|
|
+ if args.verbose:
|
|
|
+ print("Age (in days):", age.in_days())
|
|
|
+
|
|
|
+ if age.in_hours() > args.hours:
|
|
|
+ # Skip over this old record!
|
|
|
+ if args.verbose:
|
|
|
+ print("Skipping {0} : {1} hours old. {2}".format(title, age.in_hours(), when))
|
|
|
+ continue
|
|
|
+
|
|
|
+ print("Age in hours:", age.in_hours())
|
|
|
+
|
|
|
+ print("New record:", when.to_datetime_string(), "Title:", title)
|
|
|
# print(when.to_datetime_string(), title)
|
|
|
|
|
|
filename = "{0}.html".format(title)
|