commit dc9f0ed7b89bf35feffea72e68544291f6595c8b
Author: Dome <dm@ztfr.de>
Date:   Fri Apr 17 09:17:10 2026 +0200

    initial form upload

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..57540bd
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,24 @@
+# Dockerfile
+FROM python:3.11-slim
+
+# Set working directory
+WORKDIR /app
+
+# Copy and install requirements
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Create data directory
+RUN mkdir -p /app/data
+
+# Create non-root user
+RUN useradd -r -u 1000 holmirdas
+
+# Set ownership
+RUN chown -R holmirdas:holmirdas /app
+
+# Switch to non-root user
+USER holmirdas
+
+# Default command (will be overridden by docker-compose)
+CMD ["python", "gts_holmirdas.py"]
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..450a765
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Matthias
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b23f68e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,112 @@
+# GTS-HolMirDas 🚀
+
+RSS-based content discovery for [GoToSocial](https://codeberg.org/superseriousbusiness/gotosocial) instances.
+
+Automatically discovers and federates content from RSS feeds across the Fediverse, helping small GoToSocial instances populate their federated timeline without relying on traditional relays.
+
+Inspired by the original [HolMirDas](https://github.com/aliceif/HolMirDas) by [@aliceif](https://mkultra.x27.one/@aliceif), adapted for GoToSocial with enhanced Docker deployment and multi-instance processing.
+
+## ✨ Key Features
+
+- **📡 Multi-Instance Discovery** - Fetches content from configurable RSS feeds across Fediverse instances
+- **⚡ Performance Scaling** - 20-100 posts per feed with URL parameters (`?limit=100`)  
+- **🐳 Production Ready** - Docker deployment, environment-based config, health monitoring
+- **📊 Comprehensive Stats** - Runtime metrics, federation growth, performance tracking
+- **🔧 Zero Maintenance** - Runs automatically every hour with duplicate detection
+
+## 🚀 Quick Start
+
+```bash
+# Clone the repository
+git clone https://git.klein.ruhr/matthias/gts-holmirdas
+cd gts-holmirdas
+
+# Copy configuration templates
+cp .env.example .env
+cp rss_feeds.example.txt rss_feeds.txt
+
+# Edit configuration
+nano .env              # Add your GTS credentials
+nano rss_feeds.txt     # Customize RSS feeds
+
+# Deploy
+docker compose up -d
+
+# Monitor
+docker compose logs -f
+```
+
+## 📈 Performance at Scale
+
+**Real Production Data:**
+```
+📊 Runtime: 8:42 | 487 posts processed | 3,150+ instances discovered
+⚡ 56 posts/minute | 102 RSS feeds | +45 new instances per run
+💾 Resource usage: ~450MB RAM total (GoToSocial + tools)
+```
+
+**Scaling Options:**
+- **Conservative:** 20 posts/feed (~100 posts/run)
+- **Balanced:** 50 posts/feed (~300 posts/run) 
+- **Aggressive:** 100 posts/feed (~600 posts/run)
+
+## 🛠️ Configuration Essentials
+
+### Environment Variables (.env)
+```bash
+# Required
+GTS_SERVER_URL=https://your-gts-instance.tld
+GTS_ACCESS_TOKEN=your_gts_access_token
+
+# Performance Tuning
+MAX_POSTS_PER_RUN=25              # Posts per feed per run
+DELAY_BETWEEN_REQUESTS=1          # Seconds between API calls
+LOG_LEVEL=INFO                    # DEBUG for troubleshooting
+```
+
+### RSS Feeds (rss_feeds.txt)
+```bash
+# Use URL parameters to scale performance
+https://mastodon.social/tags/homelab.rss?limit=50
+https://fosstodon.org/tags/selfhosting.rss?limit=100
+https://infosec.exchange/tags/security.rss?limit=75
+```
+
+### GoToSocial Access Token
+1. Login to your GoToSocial instance
+2. Settings → Applications → Create new application
+3. Required scopes: `read`, `read:search`, `read:statuses`
+4. Copy access token to `.env` file
+
+## 📖 Complete Documentation
+
+For detailed information, visit our **[Wiki](https://git.klein.ruhr/matthias/gts-holmirdas/wiki)**:
+
+- **[📋 Installation Guide](https://git.klein.ruhr/matthias/gts-holmirdas/wiki/Installation-Guide.-)** - Detailed setup, Docker configuration, deployment options
+- **[📈 Performance & Scaling](https://git.klein.ruhr/matthias/gts-holmirdas/wiki/Performance-%26-Scaling)** - Optimization tables, scaling strategies, resource planning  
+- **[🛠️ Troubleshooting](https://git.klein.ruhr/matthias/gts-holmirdas/wiki/Troubleshooting)** - Common issues, Docker problems, debugging guide
+- **[⚙️ Advanced Configuration](https://git.klein.ruhr/matthias/gts-holmirdas/wiki/Advanced-Configuration)** - Environment variables, RSS strategies, production tips
+- **[📊 Monitoring & Stats](https://git.klein.ruhr/matthias/gts-holmirdas/wiki/Monitoring-%26-Stats)** - Understanding output, health monitoring, metrics
+- **[❓ FAQ](https://git.klein.ruhr/matthias/gts-holmirdas/wiki/FAQ+-+Frequently+Asked+Questions.-)** - Common questions and answers
+
+## 🤝 Community & Support
+
+- **[Contributing Guide](Contributing)** - Development setup and contribution guidelines *(coming soon)*
+- **Issues**: [Report bugs or request features](https://git.klein.ruhr/matthias/gts-holmirdas/issues)  
+- **Contact**: [@matthias@me.klein.ruhr](https://me.klein.ruhr/@matthias) on the Fediverse
+
+## 🔗 Related Projects
+
+- **[FediFetcher](https://github.com/nanos/fedifetcher)** - Fetches missing replies and posts
+- **[GoToSocial](https://github.com/superseriousbusiness/gotosocial)** - Lightweight ActivityPub server  
+- **[slurp](https://github.com/VyrCossont/slurp)** - Import posts from other instances
+
+## 📄 License
+
+MIT License - see [LICENSE](LICENSE) file for details.
+
+## 🙏 Acknowledgments
+
+- Inspired by [HolMirDas](https://github.com/aliceif/HolMirDas) by [@aliceif](https://mkultra.x27.one/@aliceif)
+- Built for the GoToSocial community
+- RSS-to-ActivityPub federation approach
\ No newline at end of file
diff --git a/compose.yml b/compose.yml
new file mode 100644
index 0000000..c64bc46
--- /dev/null
+++ b/compose.yml
@@ -0,0 +1,32 @@
+services:
+  gts-holmirdas:
+    build: .
+    container_name: gts-holmirdas
+    restart: unless-stopped
+
+    env_file:
+      - .env
+
+    volumes:
+      - ./data:/app/data
+      - ./gts_holmirdas.py:/app/gts_holmirdas.py:ro
+      - ./rss_feeds.txt:/app/rss_feeds.txt:ro
+
+    # Run every 3 hours (balanced frequency)
+    entrypoint: >
+      sh -c "
+      while true; do
+        echo 'Starting GTS-HolMirDas run...'
+        python gts_holmirdas.py
+        echo 'GTS-HolMirDas run completed. Sleeping for 1 hour...'
+        sleep 3600
+      done
+      "
+
+    # Resource limits
+    deploy:
+      resources:
+        limits:
+          memory: 512M
+        reservations:
+          memory: 256M
diff --git a/gts_holmirdas.py b/gts_holmirdas.py
new file mode 100644
index 0000000..77d1eaf
--- /dev/null
+++ b/gts_holmirdas.py
@@ -0,0 +1,281 @@
+#!/usr/bin/env python3
+"""
+GTS-HolMirDas: RSS-based content discovery for GoToSocial
+
+Inspired by HolMirDas by @aliceif:
+- GitHub: https://github.com/aliceif/HolMirDas
+- Fediverse: @aliceif@mkultra.x27.one
+
+This GoToSocial adaptation extends the original RSS-to-ActivityPub concept
+with Docker deployment, multi-instance processing, and comprehensive monitoring.
+"""
+
+import os
+import sys
+import time
+import json
+import logging
+import requests
+import feedparser
+from datetime import timedelta
+from urllib.parse import quote_plus
+
+class GTSHolMirDas:
+    def __init__(self):
+        """Initialize the RSS fetcher with configuration"""
+        self.config = {
+            "server_url": os.getenv("GTS_SERVER_URL", "https://your-gts-instance"),
+            "access_token": os.getenv("GTS_ACCESS_TOKEN", ""),
+            "max_posts_per_run": int(os.getenv("MAX_POSTS_PER_RUN", "25")),
+            "delay_between_requests": int(os.getenv("DELAY_BETWEEN_REQUESTS", "2")),
+            "healthcheck_url": os.getenv("HEALTHCHECK_URL", ""),
+            "log_level": os.getenv("LOG_LEVEL", "INFO")
+        }
+        
+        # Setup logging FIRST
+        logging.basicConfig(
+            level=getattr(logging, self.config["log_level"]),
+            format='%(asctime)s - %(levelname)s - %(message)s'
+        )
+        self.logger = logging.getLogger(__name__)
+        
+        # Load RSS URLs from file or environment
+        rss_urls_file = os.getenv("RSS_URLS_FILE")
+        if rss_urls_file and os.path.exists(rss_urls_file):
+            # Load from file
+            try:
+                with open(rss_urls_file, 'r') as f:
+                    self.config["rss_urls"] = [
+                        line.split('#', 1)[0].strip() for line in f
+                        if line.strip() and not line.strip().startswith('#')
+                    ]
+                self.logger.info(f"Loaded {len(self.config['rss_urls'])} RSS URLs from file: {rss_urls_file}")
+            except Exception as e:
+                self.logger.error(f"Could not load RSS URLs from file {rss_urls_file}: {e}")
+                self.config["rss_urls"] = []
+        else:
+            # Fallback to environment variable
+            self.config["rss_urls"] = [
+                url.strip() for url in os.getenv("RSS_URLS", "").split(",") 
+                if url.strip()
+            ]
+            if self.config["rss_urls"]:
+                self.logger.info(f"Loaded {len(self.config['rss_urls'])} RSS URLs from environment")
+        
+        # Load processed URLs from persistent storage
+        self.processed_urls_file = "/app/data/processed_urls.json"
+        self.processed_urls = self.load_processed_urls()
+        
+        # Statistics tracking
+        self.previous_instances = getattr(self, 'previous_instances', 0)
+
+    def load_processed_urls(self):
+        """Load previously processed URLs and instance count from file"""
+        try:
+            if os.path.exists(self.processed_urls_file):
+                with open(self.processed_urls_file, 'r') as f:
+                    data = json.load(f)
+                    # Load previous instance count for statistics
+                    self.previous_instances = data.get('previous_instances', 0)
+                    return set(data.get('processed_urls', []))
+        except Exception as e:
+            self.logger.warning(f"Could not load processed URLs: {e}")
+        
+        return set()
+
+    def save_processed_urls(self, current_instances=None):
+        """Save processed URLs and current instance count to file"""
+        try:
+            os.makedirs(os.path.dirname(self.processed_urls_file), exist_ok=True)
+            data = {
+                'processed_urls': list(self.processed_urls),
+                'last_updated': time.time()
+            }
+            # Save current instance count for next run
+            if current_instances is not None and current_instances != 'unknown':
+                data['previous_instances'] = current_instances
+            
+            with open(self.processed_urls_file, 'w') as f:
+                json.dump(data, f, indent=2)
+        except Exception as e:
+            self.logger.error(f"Could not save processed URLs: {e}")
+
+    def fetch_rss_urls(self, rss_url):
+        """Fetch URLs from RSS feed"""
+        try:
+            self.logger.info(f"Fetching RSS feed: {rss_url}")
+            
+            # Parse RSS feed
+            feed = feedparser.parse(rss_url)
+            
+            if feed.bozo:
+                self.logger.warning(f"RSS feed may have issues: {rss_url}")
+            
+            # Extract URLs from entries
+            urls = []
+            for entry in feed.entries:
+                if hasattr(entry, 'link'):
+                    urls.append(entry.link)
+            
+            self.logger.info(f"Found {len(urls)} URLs in RSS feed")
+            return urls
+            
+        except Exception as e:
+            self.logger.error(f"Error fetching RSS feed {rss_url}: {e}")
+            return []
+
+    def lookup_post(self, post_url):
+        """Look up a post URL using GTS search API"""
+        try:
+            # Prepare search API call
+            search_url = f"{self.config['server_url']}/api/v2/search"
+            params = {
+                'q': post_url,
+                'type': 'statuses',
+                'resolve': 'true',
+                'limit': 1
+            }
+            headers = {
+                'Authorization': f'Bearer {self.config["access_token"]}',
+                'Content-Type': 'application/json'
+            }
+            
+            # Make API call
+            response = requests.get(
+                search_url,
+                params=params,
+                headers=headers,
+                timeout=30
+            )
+            
+            if response.status_code == 200:
+                results = response.json()
+                if results.get('statuses') or results.get('accounts'):
+                    self.logger.info(f"Successfully looked up: {post_url}")
+                    return True
+                else:
+                    self.logger.warning(f"No results for: {post_url}")
+                    return False
+            else:
+                self.logger.error(f"API error {response.status_code} for {post_url}: {response.text}")
+                return False
+
+        except requests.exceptions.RequestException as e:
+            self.logger.error(f"Error looking up {post_url}: {e}")
+            return False
+
+    def process_feeds(self):
+        """Process all configured RSS feeds"""
+        total_processed = 0
+
+        # Record start time for statistics
+        self.start_time = time.time()
+
+        # Ping healthcheck start
+        self.ping_healthcheck("/start")
+
+        try:
+            for rss_url in self.config["rss_urls"]:
+                if not rss_url.strip():
+                    continue
+
+                self.logger.info(f"Processing feed: {rss_url}")
+
+                # Get URLs from RSS
+                urls = self.fetch_rss_urls(rss_url)
+
+                # Filter out already processed URLs
+                new_urls = [url for url in urls if url not in self.processed_urls]
+
+                if not new_urls:
+                    self.logger.info("No new URLs to process")
+                    continue
+
+                # Rate limiting: max posts per run
+                urls_to_process = new_urls[:self.config["max_posts_per_run"]]
+
+                self.logger.info(f"Processing {len(urls_to_process)} new URLs")
+
+                for url in urls_to_process:
+                    if self.lookup_post(url):
+                        self.processed_urls.add(url)
+                        total_processed += 1
+
+                    # Rate limiting: delay between requests
+                    time.sleep(self.config["delay_between_requests"])
+
+            # Calculate runtime
+            end_time = time.time()
+            runtime_seconds = end_time - self.start_time
+            runtime_formatted = str(timedelta(seconds=int(runtime_seconds)))
+            
+            # Get current instance count
+            try:
+                instance_info = requests.get(f"{self.config['server_url']}/api/v1/instance", 
+                                           headers={'Authorization': f'Bearer {self.config["access_token"]}'}, 
+                                           timeout=10)
+                if instance_info.status_code == 200:
+                    current_instances = instance_info.json().get('stats', {}).get('domain_count', 'unknown')
+                else:
+                    current_instances = 'unknown'
+            except Exception as e:
+                self.logger.error(f"Failed to get instance count: {e}")
+                current_instances = 'unknown'
+            
+            # Calculate new instances (if we have previous data)
+            new_instances = 'unknown'
+            if self.previous_instances > 0 and current_instances != 'unknown':
+                new_instances = current_instances - self.previous_instances
+            
+            # Print comprehensive statistics
+            print(f"\n📊 GTS-HolMirDas Run Statistics:")
+            print(f"   ⏱️  Runtime: {runtime_formatted}")
+            print(f"   📄 Total posts processed: {total_processed}")
+            print(f"   🌐 Current known instances: {current_instances}")
+            if new_instances != 'unknown' and new_instances > 0:
+                print(f"   ➕ New instances discovered: +{new_instances}")
+            elif new_instances == 0:
+                print(f"   ➕ New instances discovered: +0")
+            print(f"   📡 RSS feeds processed: {len(self.config['rss_urls'])}")
+            if runtime_seconds > 60:
+                print(f"   ⚡ Posts per minute: {total_processed / (runtime_seconds / 60):.1f}")
+
+            self.save_processed_urls(current_instances)
+
+            # Ping healthcheck success
+            self.ping_healthcheck("")
+
+        except Exception as e:
+            self.logger.error(f"Error during processing: {e}")
+            # Ping healthcheck failure
+            self.ping_healthcheck("/fail")
+            raise
+
+    def ping_healthcheck(self, endpoint=""):
+        """Ping healthchecks.io for monitoring"""
+        if not self.config.get("healthcheck_url"):
+            return
+
+        try:
+            url = self.config["healthcheck_url"] + endpoint
+            requests.get(url, timeout=10)
+        except Exception as e:
+            self.logger.warning(f"Failed to ping healthcheck: {e}")
+
+def main():
+    """Main entry point"""
+    try:
+        fetcher = GTSHolMirDas()
+
+        # Validate required config
+        if not fetcher.config["access_token"]:
+            raise ValueError("GTS_ACCESS_TOKEN environment variable is required")
+
+        fetcher.process_feeds()
+
+    except Exception as e:
+        logging.error(f"Fatal error: {e}")
+        raise
+
+if __name__ == "__main__":
+    main()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..f2d32a5
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+requests==2.31.0
+feedparser==6.0.10
+urllib3==2.0.7
diff --git a/rss_feeds.example.txt b/rss_feeds.example.txt
new file mode 100644
index 0000000..d532f51
--- /dev/null
+++ b/rss_feeds.example.txt
@@ -0,0 +1,17 @@
+# Example RSS feeds - customize for your interests
+
+# Add ?limit=X parameter to increase posts per feed (default: 20, max: 100)
+# Higher limits = more content discovery, but longer processing time
+# Performance tip: Start with limit=50, then increase to 100 if needed
+
+# homelab (up to 100 posts per feed)
+https://mastodon.social/tags/homelab.rss                 #  20 posts/feed (default)
+https://fosstodon.org/tags/homelab.rss?limit=50          #  50 posts/feed
+
+# selfhosting (up to 100 posts per feed)  
+https://mastodon.social/tags/selfhosting.rss?limit=100   # 100 posts/feed
+https://infosec.exchange/tags/selfhosting.rss?limit=100  # 100 posts/feed
+
+# docker (up to 100 posts per feed)
+https://social.tchncs.de/tags/docker.rss?limit=100       # 100 posts/feed
+https://fosstodon.org/tags/docker.rss?limit=100          # 100 posts/feed