lwn-rss/generate_feeds.py

21 lines
690 B
Python

import requests
from lxml import etree as ET
def download_feed(s, url, file, remove_premium=False, fulltext=False):
r = s.get(url)
tree = ET.ElementTree(ET.fromstring(r.text))
root = tree.getroot()
for post in tree.iter('item'):
if remove_premium and "[$]" in post.find('title').text:
root[0].remove(post)
## TODO: full-text parsing
tree.write(file)
s = requests.Session()
s.headers.update({'User-Agent': 'FreshRSS/1.23.1 (Linux; https://freshrss.org)'})
download_feed(s, "https://lwn.net/headlines/Features", "lwn-features.xml", remove_premium=True)
download_feed(s, "https://lwn.net/headlines/rss", "lwn-all.xml", remove_premium=True)