21 lines
690 B
Python
21 lines
690 B
Python
import requests
|
|
from lxml import etree as ET
|
|
|
|
def download_feed(s, url, file, remove_premium=False, fulltext=False):
|
|
r = s.get(url)
|
|
tree = ET.ElementTree(ET.fromstring(r.text))
|
|
root = tree.getroot()
|
|
for post in tree.iter('item'):
|
|
if remove_premium and "[$]" in post.find('title').text:
|
|
root[0].remove(post)
|
|
## TODO: full-text parsing
|
|
|
|
tree.write(file)
|
|
|
|
s = requests.Session()
|
|
s.headers.update({'User-Agent': 'FreshRSS/1.23.1 (Linux; https://freshrss.org)'})
|
|
|
|
download_feed(s, "https://lwn.net/headlines/Features", "lwn-features.xml", remove_premium=True)
|
|
download_feed(s, "https://lwn.net/headlines/rss", "lwn-all.xml", remove_premium=True)
|
|
|