فهرست منبع

Updated: Changing date parsing code. Added args.

We now REQUIRE -f, or we'll use stale data!
We are probably going to look at records within 2 days.
We are probably going to delete records > 7 days.
Steve Thielemann 5 سال پیش
والد
کامیت
ee51523f1a
1فایلهای تغییر یافته به همراه38 افزوده شده و 12 حذف شده
  1. 38 12
      ubuntu_load.py

+ 38 - 12
ubuntu_load.py

@@ -17,29 +17,42 @@ env = Environment(
 # Filter / delete the old files here
 
 import os
-import time
+import argparse
 
-now = time.time()
+parser = argparse.ArgumentParser()
+parser.add_argument("-e", "--expire", type=int, help="Expire files after this many days.", default=7)
+parser.add_argument("-f", "--fresh", help="Download fresh copy of RSS feed.", action="store_true")
+parser.add_argument("--hours", type=int, help="Number of hours (range) we consider to be new.", default=12)
+parser.add_argument("-v", "--verbose", help="Display more verbose information.", action="store_true")
+args = parser.parse_args()
+
+now = pendulum.now()
 
 for f in os.listdir():
     if f.endswith('.html') or f.endswith('.text'):
-        created = os.path.getctime(f)
-        if (now - created) // (24 * 3600) >= 7:
+        created = pendulum.from_timestamp(os.path.getctime(f), tz='local')
+        age = now-created
+        if age.in_days() >= args.expire:
             # Older then 7 days
             os.unlink(f)
             print("Removed {}".format(f))
-
+        else:
+            if args.verbose:
+                print("{0} is {1} day(s) old.".format(f, age.in_days()))
 
 
 url = 'https://usn.ubuntu.com/rss.xml'
 
-data = feedparser.parse(url)
-# pprint(data)
-# Delete this key, so we can save as json.
-del( data['bozo_exception'])
+if args.fresh:
+    data = feedparser.parse(url)
+    del( data['bozo_exception'])
 
-with open('rss.json', 'w') as fp:
-    json.dump(data, fp)
+    with open('rss.json', 'w') as fp:
+        json.dump(data, fp)
+else:
+    print("Loading stale data.")
+    with open('rss.json') as fp:
+        data = json.load(fp)
 
 # For working "offline"
 # with open('rss.json') as fp:
@@ -52,8 +65,21 @@ output = { 'total': 0, 'entries': list() }
 for entry in data['entries']:
     # when = pendulum.parse(entry['published'])
     # Mon, 18 Nov 2019 12:42:01 +0000
-    # when = pendulum.from_format(entry['published'], "ddd, D MMM YYYY HH:mm:ss ZZ")
+    when = pendulum.from_format(entry['published'], "ddd, D MMM YYYY HH:mm:ss ZZ")
     title = entry['title']
+    age = now - when 
+    if args.verbose:
+        print("Age (in days):", age.in_days())
+
+    if age.in_hours() > args.hours:
+        # Skip over this old record!
+        if args.verbose:
+            print("Skipping {0} : {1} hours old. {2}".format(title, age.in_hours(), when))
+        continue
+
+    print("Age in hours:", age.in_hours())
+
+    print("New record:", when.to_datetime_string(), "Title:", title)
     # print(when.to_datetime_string(), title)
 
     filename = "{0}.html".format(title)