فهرست منبع

The actual python script that does the work.

* Loads the RSS feed and parses it.  (Feedparser!)
* Stores the feed as rss.json.  (because we can)
* Checks to see if a {Title}.html file exists.
* If not, there's something to do.
*   Dump summary out as html file.
*   Use elinks to convert to text file.
*   Save info into output dictionary.
* Have anything?  Yes, use jinja2 template to create
*   ubuntu.ini file.
Steve Thielemann 5 سال پیش
والد
کامیت
4dc694b17b
1فایلهای تغییر یافته به همراه84 افزوده شده و 0 حذف شده
  1. 84 0
      ubuntu_load.py

+ 84 - 0
ubuntu_load.py

@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+
+import json
+import pendulum
+from pprint import pprint
+import os.path
+from subprocess import check_output
+import feedparser
+
+from jinja2 import Environment, FileSystemLoader, select_autoescape
+
+env = Environment(
+    loader=FileSystemLoader('templates/'),
+    autoescape=select_autoescape(['html', 'xml'])
+)
+
+# Filter / delete the old files here
+
+import os
+import time
+
+now = time.time()
+
+for f in os.listdir():
+    if f.endswith('.html') or f.endswith('.text'):
+        created = os.path.getctime(f)
+        if (now - created) // (24 * 3600) >= 7:
+            # Older then 7 days
+            os.unlink(f)
+            print("Removed {}".format(f))
+
+
+
+url = 'https://usn.ubuntu.com/rss.xml'
+
+data = feedparser.parse(url)
+# pprint(data)
+# Delete this key, so we can save as json.
+del( data['bozo_exception'])
+
+with open('rss.json', 'w') as fp:
+    json.dump(data, fp)
+
+# For working "offline"
+# with open('rss.json') as fp:
+#     data = json.load(fp)
+
+# pprint(data)
+
+output = { 'total': 0, 'entries': list() }
+
+for entry in data['entries']:
+    # when = pendulum.parse(entry['published'])
+    # Mon, 18 Nov 2019 12:42:01 +0000
+    # when = pendulum.from_format(entry['published'], "ddd, D MMM YYYY HH:mm:ss ZZ")
+    title = entry['title']
+    # print(when.to_datetime_string(), title)
+
+    filename = "{0}.html".format(title)
+    textname = "{0}.text".format(title)
+    textname = textname.replace(':', '').replace(' ', '_')
+
+    if not os.path.exists(filename):
+        print("Missing:", when.to_datetime_string(), title)
+        with open(filename, "wb") as fp:
+            fp.write(entry['summary'].encode('utf-8'))
+        # Ok, convert into text
+        #  elinks -dump 0 -dump-width 72 --no-references --no-numbering ./1.html
+        text = check_output( ["elinks", "-dump", "0", "-dump-width", "72", "--no-references", "--no-numbering", "./" + filename], universal_newlines=True, shell=False)
+        with open(textname, "w") as fp:
+            fp.write(text)
+        (usn, _, short_title) = title.partition(':')
+        output['entries'].append({"filename": textname, "title": short_title})
+        output['total'] = output['total'] + 1
+        # print(text)
+
+    # print(entry['summary'])
+
+if output['total'] > 0:
+    template = env.get_template('ubuntu.ini')
+    with open('../ubuntu.ini', 'w') as fp:
+        fp.write(template.render(output))
+
+