Update gts_holmirdas.py
This commit is contained in:
+15
-11
@@ -6,7 +6,7 @@ import json
|
||||
import logging
|
||||
import requests
|
||||
import feedparser
|
||||
from datetime import timedelta
|
||||
from datetime import timedelta, datetime
|
||||
|
||||
class GTSHolMirDas:
|
||||
def __init__(self):
|
||||
@@ -27,6 +27,7 @@ class GTSHolMirDas:
|
||||
)
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.db_path = os.getenv("DATABASE_PATH", "/app/data/processed_urls.json")
|
||||
|
||||
self.processed_urls, self.previous_instances = self.load_state()
|
||||
|
||||
self.session = requests.Session()
|
||||
@@ -46,7 +47,7 @@ class GTSHolMirDas:
|
||||
def load_state(self):
|
||||
if os.path.exists(self.db_path):
|
||||
try:
|
||||
with open(self.db_path, 'r') as f:
|
||||
with open(self.db_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
return set(data.get('processed_urls', [])), data.get('previous_instances', 0)
|
||||
except Exception as e:
|
||||
@@ -57,17 +58,19 @@ class GTSHolMirDas:
|
||||
try:
|
||||
os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
|
||||
url_list = list(self.processed_urls)[-5000:]
|
||||
with open(self.db_path, 'w') as f:
|
||||
with open(self.db_path, 'w', encoding='utf-8') as f:
|
||||
json.dump({'processed_urls': url_list, 'previous_instances': current_instances}, f, indent=2)
|
||||
except Exception as e:
|
||||
self.logger.error(f"Save error: {e}")
|
||||
|
||||
def process_feeds(self):
|
||||
self.logger.info(f"📂 Starte Suchlauf. Datenbank: {self.db_path}")
|
||||
|
||||
if not os.path.exists(self.config["rss_urls_file"]):
|
||||
self.logger.error("RSS_URLS_FILE fehlt!")
|
||||
return
|
||||
|
||||
with open(self.config["rss_urls_file"], 'r') as f:
|
||||
with open(self.config["rss_urls_file"], 'r', encoding='utf-8') as f:
|
||||
rss_urls = [l.split('#')[0].strip() for l in f if l.strip() and not l.strip().startswith('#')]
|
||||
|
||||
total_new = 0
|
||||
@@ -87,7 +90,6 @@ class GTSHolMirDas:
|
||||
if new_links:
|
||||
for url in new_links[:self.config["max_posts_per_run"]]:
|
||||
try:
|
||||
# Timeout auf 30s erhöht, um "Read timed out" zu vermeiden
|
||||
r = self.session.get(
|
||||
f"{self.config['server_url']}/api/v2/search",
|
||||
params={'q': url, 'resolve': 'true', 'limit': 1},
|
||||
@@ -104,13 +106,11 @@ class GTSHolMirDas:
|
||||
except Exception as e:
|
||||
self.logger.error(f"Fehler bei Post {url}: {e}")
|
||||
|
||||
# OPTIMIERUNG: Speichert nach jedem Feed, wenn neue Posts gefunden wurden
|
||||
self.save_state(self.previous_instances)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Fehler bei Feed {rss_url}: {e}")
|
||||
|
||||
# Instanz-Statistiken am Ende des gesamten Runs
|
||||
try:
|
||||
ri = self.session.get(f"{self.config['server_url']}/api/v1/instance", timeout=10)
|
||||
curr = ri.json().get('stats', {}).get('domain_count', 0)
|
||||
@@ -123,12 +123,16 @@ class GTSHolMirDas:
|
||||
self.save_state(curr)
|
||||
|
||||
def run_forever(self):
|
||||
wait = self.parse_interval(self.config["fetch_interval"])
|
||||
self.logger.info(f"GTS-Federator aktiv (Intervall: {self.config['fetch_interval']})")
|
||||
wait_seconds = self.parse_interval(self.config["fetch_interval"])
|
||||
self.logger.info(f"GTS-Federator aktiv. Intervall: {self.config['fetch_interval']}")
|
||||
while True:
|
||||
self.process_feeds()
|
||||
self.logger.info(f"Nächster Run in {self.config['fetch_interval']}...")
|
||||
time.sleep(wait)
|
||||
|
||||
next_run = datetime.now() + timedelta(seconds=wait_seconds)
|
||||
self.logger.info(f"💤 Run abgeschlossen. Gehe für {self.config['fetch_interval']} in die Pause.")
|
||||
self.logger.info(f"⏰ Nächster geplanter Durchlauf: {next_run.strftime('%H:%M:%S')}")
|
||||
|
||||
time.sleep(wait_seconds)
|
||||
|
||||
if __name__ == "__main__":
|
||||
bot = GTSHolMirDas()
|
||||
|
||||
Reference in New Issue
Block a user