5 years ago · 4dc694b17b
--- a/ubuntu_load.py
+++ b/ubuntu_load.py
@@ -0,0 +1,84 @@
 
				+#!/usr/bin/env python
			
 
				+
			
 
				+import json
			
 
				+import pendulum
			
 
				+from pprint import pprint
			
 
				+import os.path
			
 
				+from subprocess import check_output
			
 
				+import feedparser
			
 
				+
			
 
				+from jinja2 import Environment, FileSystemLoader, select_autoescape
			
 
				+
			
 
				+env = Environment(
			
 
				+    loader=FileSystemLoader('templates/'),
			
 
				+    autoescape=select_autoescape(['html', 'xml'])
			
 
				+)
			
 
				+
			
 
				+# Filter / delete the old files here
			
 
				+
			
 
				+import os
			
 
				+import time
			
 
				+
			
 
				+now = time.time()
			
 
				+
			
 
				+for f in os.listdir():
			
 
				+    if f.endswith('.html') or f.endswith('.text'):
			
 
				+        created = os.path.getctime(f)
			
 
				+        if (now - created) // (24 * 3600) >= 7:
			
 
				+            # Older then 7 days
			
 
				+            os.unlink(f)
			
 
				+            print("Removed {}".format(f))
			
 
				+
			
 
				+
			
 
				+
			
 
				+url = 'https://usn.ubuntu.com/rss.xml'
			
 
				+
			
 
				+data = feedparser.parse(url)
			
 
				+# pprint(data)
			
 
				+# Delete this key, so we can save as json.
			
 
				+del( data['bozo_exception'])
			
 
				+
			
 
				+with open('rss.json', 'w') as fp:
			
 
				+    json.dump(data, fp)
			
 
				+
			
 
				+# For working "offline"
			
 
				+# with open('rss.json') as fp:
			
 
				+#     data = json.load(fp)
			
 
				+
			
 
				+# pprint(data)
			
 
				+
			
 
				+output = { 'total': 0, 'entries': list() }
			
 
				+
			
 
				+for entry in data['entries']:
			
 
				+    # when = pendulum.parse(entry['published'])
			
 
				+    # Mon, 18 Nov 2019 12:42:01 +0000
			
 
				+    # when = pendulum.from_format(entry['published'], "ddd, D MMM YYYY HH:mm:ss ZZ")
			
 
				+    title = entry['title']
			
 
				+    # print(when.to_datetime_string(), title)
			
 
				+
			
 
				+    filename = "{0}.html".format(title)
			
 
				+    textname = "{0}.text".format(title)
			
 
				+    textname = textname.replace(':', '').replace(' ', '_')
			
 
				+
			
 
				+    if not os.path.exists(filename):
			
 
				+        print("Missing:", when.to_datetime_string(), title)
			
 
				+        with open(filename, "wb") as fp:
			
 
				+            fp.write(entry['summary'].encode('utf-8'))
			
 
				+        # Ok, convert into text
			
 
				+        #  elinks -dump 0 -dump-width 72 --no-references --no-numbering ./1.html
			
 
				+        text = check_output( ["elinks", "-dump", "0", "-dump-width", "72", "--no-references", "--no-numbering", "./" + filename], universal_newlines=True, shell=False)
			
 
				+        with open(textname, "w") as fp:
			
 
				+            fp.write(text)
			
 
				+        (usn, _, short_title) = title.partition(':')
			
 
				+        output['entries'].append({"filename": textname, "title": short_title})
			
 
				+        output['total'] = output['total'] + 1
			
 
				+        # print(text)
			
 
				+
			
 
				+    # print(entry['summary'])
			
 
				+
			
 
				+if output['total'] > 0:
			
 
				+    template = env.get_template('ubuntu.ini')
			
 
				+    with open('../ubuntu.ini', 'w') as fp:
			
 
				+        fp.write(template.render(output))
			
 
				+
			
 
				+