feat: add support for proxying feed requests

Introduced optional proxy configuration for fetching RSS feeds,
including support for using Tor SOCKS5 proxy for enhanced privacy.
Added properties to manage proxy settings and updated fetch
functionality to utilize the proxy when necessary. This helps route
requests through specified proxies, supporting privacy use cases
like accessing .onion URLs.
This commit is contained in:
Kumi 2024-06-15 19:58:15 +02:00
parent 6727d8eabe
commit 08fbba0e0a
Signed by: kumi
GPG key ID: ECBCC9082395383F
2 changed files with 55 additions and 1 deletions

View file

@ -45,6 +45,17 @@ Operator = Contact details not set
#
LogLevel = info
# The URL to a proxy server to use for requests to feeds, if any
# For example, if you want to use the Tor network, you can set this to
# "socks5://127.0.0.1:9050"
#
# Proxy = socks5://yourproxy:port
# Set to 1 if the proxy should only be used for .onion addresses
# Defaults to 0
#
# ProxyOnionOnly = 1
###############################################################################
[Matrix]

View file

@ -22,11 +22,14 @@ from typing import Optional, List
from configparser import ConfigParser
from datetime import datetime
from io import BytesIO
from urllib.parse import urlparse
import uuid
import traceback
import json
import aiohttp
from aiohttp_socks import ProxyConnector
import markdown2
import feedparser
@ -108,6 +111,24 @@ class RSSBot:
"matrix-rssbot/dev (+https://git.private.coffee/PrivateCoffee/matrix-rssbot)"
)
@property
def proxy(self) -> Optional[str]:
"""Proxy to use for HTTP requests.
Returns:
Optional[str]: The proxy to use for HTTP requests. Defaults to None.
"""
return self.config["RSSBot"].get("Proxy")
@property
def proxy_onion_only(self) -> bool:
"""Whether to use the proxy only for .onion URLs.
Returns:
bool: Whether to use the proxy only for .onion URLs. Defaults to False.
"""
return self.config["RSSBot"].getboolean("ProxyOnionOnly", False)
@classmethod
def from_config(cls, config: ConfigParser):
"""Create a new RSSBot instance from a config file.
@ -458,6 +479,28 @@ class RSSBot:
room, "rssbot.event_type", {"event_type": event_type}
)
async def fetch_feed(self, url: str) -> feedparser.FeedParserDict:
"""Fetch the RSS feed, using Tor SOCKS5 proxy for .onion URLs.
Args:
url (str): The URL of the RSS feed.
Returns:
feedparser.FeedParserDict: The parsed RSS feed.
"""
parsed = urlparse(url)
if self.proxy and (
not self.proxy_onion_only or parsed.hostname.endswith(".onion")
):
connector = ProxyConnector.from_url(self.proxy)
else:
connector = aiohttp.TCPConnector()
async with aiohttp.ClientSession(connector=connector) as session:
async with session.get(url) as response:
content = await response.text()
return feedparser.parse(content)
async def process_room(self, room):
self.logger.log(f"Processing room {room}", "debug")
@ -483,7 +526,7 @@ class RSSBot:
timestamp = 0
try:
feed_content = feedparser.parse(feed)
feed_content = await self.fetch_feed(feed)
new_timestamp = timestamp
for entry in feed_content.entries:
try: