ubuntu_load.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. #!/usr/bin/env python
  2. import json
  3. import pendulum
  4. from pprint import pprint
  5. import os.path
  6. from subprocess import check_output
  7. import feedparser
  8. from jinja2 import Environment, FileSystemLoader, select_autoescape
  9. env = Environment(
  10. loader=FileSystemLoader('templates/'),
  11. autoescape=select_autoescape(['html', 'xml'])
  12. )
  13. # Filter / delete the old files here
  14. import os
  15. import time
  16. now = time.time()
  17. for f in os.listdir():
  18. if f.endswith('.html') or f.endswith('.text'):
  19. created = os.path.getctime(f)
  20. if (now - created) // (24 * 3600) >= 7:
  21. # Older then 7 days
  22. os.unlink(f)
  23. print("Removed {}".format(f))
  24. url = 'https://usn.ubuntu.com/rss.xml'
  25. data = feedparser.parse(url)
  26. # pprint(data)
  27. # Delete this key, so we can save as json.
  28. del( data['bozo_exception'])
  29. with open('rss.json', 'w') as fp:
  30. json.dump(data, fp)
  31. # For working "offline"
  32. # with open('rss.json') as fp:
  33. # data = json.load(fp)
  34. # pprint(data)
  35. output = { 'total': 0, 'entries': list() }
  36. for entry in data['entries']:
  37. # when = pendulum.parse(entry['published'])
  38. # Mon, 18 Nov 2019 12:42:01 +0000
  39. # when = pendulum.from_format(entry['published'], "ddd, D MMM YYYY HH:mm:ss ZZ")
  40. title = entry['title']
  41. # print(when.to_datetime_string(), title)
  42. filename = "{0}.html".format(title)
  43. textname = "{0}.text".format(title)
  44. textname = textname.replace(':', '').replace(' ', '_')
  45. if not os.path.exists(filename):
  46. print("Missing:", when.to_datetime_string(), title)
  47. with open(filename, "wb") as fp:
  48. fp.write(entry['summary'].encode('utf-8'))
  49. # Ok, convert into text
  50. # elinks -dump 0 -dump-width 72 --no-references --no-numbering ./1.html
  51. text = check_output( ["elinks", "-dump", "0", "-dump-width", "72", "--no-references", "--no-numbering", "./" + filename], universal_newlines=True, shell=False)
  52. with open(textname, "w") as fp:
  53. fp.write(text)
  54. (usn, _, short_title) = title.partition(':')
  55. output['entries'].append({"filename": textname, "title": short_title})
  56. output['total'] = output['total'] + 1
  57. # print(text)
  58. # print(entry['summary'])
  59. if output['total'] > 0:
  60. template = env.get_template('ubuntu.ini')
  61. with open('../ubuntu.ini', 'w') as fp:
  62. fp.write(template.render(output))