ubuntu_load.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. #!/usr/bin/env python
  2. import json
  3. import pendulum
  4. from pprint import pprint
  5. import os.path
  6. from subprocess import check_output
  7. import feedparser
  8. from jinja2 import Environment, FileSystemLoader, select_autoescape
  9. env = Environment(
  10. loader=FileSystemLoader('templates/'),
  11. autoescape=select_autoescape(['html', 'xml'])
  12. )
  13. # Filter / delete the old files here
  14. import os
  15. import argparse
  16. parser = argparse.ArgumentParser()
  17. parser.add_argument("-e", "--expire", type=int, help="Expire files after this many days.", default=7)
  18. parser.add_argument("-f", "--fresh", help="Download fresh copy of RSS feed.", action="store_true")
  19. parser.add_argument("-n", "--new", type=int, help="Number of Days (range) we consider to be new.", default=2)
  20. parser.add_argument("-v", "--verbose", help="Display more verbose information.", action="store_true")
  21. parser.add_argument("--hour", type=int, help="Run every so many hours.", default=4)
  22. args = parser.parse_args()
  23. now = pendulum.now(tz='America/New_York')
  24. import sys
  25. if now.hour % args.hour != 0:
  26. if args.verbose:
  27. print("NO, NOT YET!")
  28. sys.exit(0)
  29. for f in os.listdir():
  30. if f.endswith('.html') or f.endswith('.text'):
  31. created = pendulum.from_timestamp(os.path.getctime(f), tz='local')
  32. age = now-created
  33. if age.in_days() >= args.expire:
  34. # Older then 7 days
  35. print("Removing old file {0} ({1} >= {2}).".format(f, age.in_days(), args.expire))
  36. os.unlink(f)
  37. else:
  38. if args.verbose:
  39. print("Keep {0} is {1} day(s) old.".format(f, age.in_days()))
  40. url = 'https://usn.ubuntu.com/rss.xml'
  41. if args.fresh:
  42. data = feedparser.parse(url)
  43. if 'bozo_exception' in data:
  44. del( data['bozo_exception'])
  45. with open('rss.json', 'w') as fp:
  46. json.dump(data, fp)
  47. else:
  48. print("Loading stale data.")
  49. with open('rss.json') as fp:
  50. data = json.load(fp)
  51. # For working "offline"
  52. # with open('rss.json') as fp:
  53. # data = json.load(fp)
  54. # pprint(data)
  55. output = { 'total': 0, 'entries': list() }
  56. for entry in data['entries']:
  57. # when = pendulum.parse(entry['published'])
  58. # Mon, 18 Nov 2019 12:42:01 +0000
  59. when = pendulum.from_format(entry['published'], "ddd, D MMM YYYY HH:mm:ss ZZ")
  60. title = entry['title']
  61. age = now - when
  62. if age.in_days() > args.new:
  63. # Skip over this old record!
  64. if args.verbose:
  65. # print("Skipping {0} : {1} days old. {2}".format(title, age.in_days(), when))
  66. print("Skipping {0} : {1} days old.".format(title, age.in_days()))
  67. continue
  68. print("Possible:", when.to_datetime_string(), "Title:", title)
  69. # print(when.to_datetime_string(), title)
  70. filename = "{0}.html".format(title)
  71. textname = "{0}.text".format(title)
  72. textname = textname.replace(':', '').replace(' ', '_')
  73. if not os.path.exists(filename):
  74. print("Missing/TO Display:", entry['published'], title)
  75. with open(filename, "wb") as fp:
  76. fp.write(entry['summary'].encode('utf-8'))
  77. # Ok, convert into text
  78. # elinks -dump 0 -dump-width 72 --no-references --no-numbering ./1.html
  79. text = check_output( ["elinks", "-dump", "0", "-dump-width", "72", "--no-references", "--no-numbering", "./" + filename], universal_newlines=True, shell=False)
  80. with open(textname, "w") as fp:
  81. fp.write(text)
  82. (usn, _, short_title) = title.partition(':')
  83. output['entries'].append({"filename": textname, "title": short_title})
  84. output['total'] = output['total'] + 1
  85. # print(text)
  86. # print(entry['summary'])
  87. if output['total'] > 0:
  88. template = env.get_template('ubuntu.ini')
  89. with open('../ubuntu.ini', 'w') as fp:
  90. fp.write(template.render(output))