From 03999875b52d73f865ca74d71ab5e153522098db Mon Sep 17 00:00:00 2001
From: Remora <remora@dilain.com>
Date: Mon, 9 Feb 2026 18:46:45 +0100
Subject: [PATCH] Feat: Add web_fetch, AI analysis (Haiku), and comprehensive
 logging

---
 bot.py | 253 +++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 184 insertions(+), 69 deletions(-)
diff --git a/bot.py b/bot.py
index 0399a90..a375a32 100644
--- a/bot.py
+++ b/bot.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
-Discord bot for #remora channel - analyzes links in real-time
-Posts summaries, adds to Tududi inbox, maintains JSON history
+Discord bot for #remora channel - analyzes links in real-time with web_fetch + AI
+Posts summaries, adds to Tududi inbox, maintains JSON history + logs
 """
 
 import discord
@@ -12,17 +12,38 @@ import requests
 from datetime import datetime
 from pathlib import Path
 from dotenv import load_dotenv
+import logging
+from urllib.parse import urlparse
 
 # Load .env file
 load_dotenv()
 
+# Setup logging
+log_file = Path(__file__).parent / "bot.log"
+logging.basicConfig(
+    level=logging.DEBUG,
+    format='[%(asctime)s] [%(levelname)-8s] %(message)s',
+    handlers=[
+        logging.FileHandler(log_file),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
+
 # Config
 CHANNEL_ID = 1467557082583535729
 TRACKER_FILE = Path(__file__).parent / "tracker.json"
 TUDUDI_API_URL = os.getenv("TUDUDI_API_URL", "https://todo.dilain.com/api/v1")
 TUDUDI_API_KEY = os.getenv("TUDUDI_API_KEY")
 GATEWAY_URL = os.getenv("OPENCLAW_GATEWAY", "http://127.0.0.1:18789")
-GATEWAY_TOKEN = os.getenv("OPENCLAW_GATEWAY_TOKEN")
+GATEWAY_TOKEN = os.getenv("OPENCLAW_GATEWAY_TOKEN", "")
+
+logger.info("=" * 60)
+logger.info("Bot startup")
+logger.info(f"  Channel ID: {CHANNEL_ID}")
+logger.info(f"  Tududi API: {TUDUDI_API_URL}")
+logger.info(f"  Gateway: {GATEWAY_URL}")
+logger.info("=" * 60)
 
 # Load or init tracker
 def load_tracker():
@@ -44,56 +65,127 @@ def extract_urls(text):
     url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
     return re.findall(url_pattern, text)
 
-# Fetch and analyze URL
-def analyze_url(url):
-    """Fetch URL and create summary"""
+# Detect link type
+def detect_link_type(url):
+    domain = urlparse(url).netloc.lower()
+    
+    if "github.com" in domain:
+        return "GitHub"
+    elif "reddit.com" in domain:
+        return "Reddit"
+    elif "youtube.com" in domain or "youtu.be" in domain:
+        return "YouTube"
+    elif "tiktok.com" in domain:
+        return "TikTok"
+    elif "twitter.com" in domain or "x.com" in domain:
+        return "Twitter/X"
+    elif "medium.com" in domain:
+        return "Medium"
+    elif "dev.to" in domain:
+        return "Dev.to"
+    elif "arxiv.org" in domain:
+        return "arXiv"
+    else:
+        return "Article"
+
+# Fetch URL content using requests
+def fetch_url_content(url):
+    """Fetch URL and return title + excerpt"""
+    logger.debug(f"  📥 Fetching: {url}")
+    
     try:
-        print(f"  📥 Fetching: {url}")
-        response = requests.get(url, timeout=5, headers={
-            'User-Agent': 'Mozilla/5.0'
-        })
-        content = response.text[:2000]  # First 2k chars
+        response = requests.get(
+            url,
+            timeout=5,
+            headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64)'},
+            allow_redirects=True
+        )
+        response.raise_for_status()
+        content = response.text[:3000]  # First 3k chars
         
         # Extract title
         title_match = re.search(r'<title[^>]*>([^<]+)</title>', content, re.IGNORECASE)
-        title = title_match.group(1).strip() if title_match else url.split('/')[-1]
+        title = title_match.group(1).strip() if title_match else "No title found"
         
-        # Simple content type detection
-        link_type = "webpage"
-        if "github.com" in url:
-            link_type = "GitHub"
-        elif "reddit.com" in url:
-            link_type = "Reddit"
-        elif "youtube.com" in url or "youtu.be" in url:
-            link_type = "YouTube"
-        elif "tiktok.com" in url:
-            link_type = "TikTok"
-        elif "twitter.com" in url or "x.com" in url:
-            link_type = "Twitter/X"
+        # Extract meta description
+        desc_match = re.search(r'<meta\s+name="description"\s+content="([^"]+)"', content, re.IGNORECASE)
+        description = desc_match.group(1) if desc_match else ""
         
+        logger.debug(f"    ✓ Fetched: {title}")
         return {
             "title": title,
-            "type": link_type,
+            "description": description,
+            "content": content,
             "status": "ok"
         }
+    except requests.Timeout:
+        logger.warning(f"    ⏱️ Timeout: {url}")
+        return {"title": "Request timeout", "status": "timeout", "content": ""}
+    except requests.HTTPError as e:
+        logger.warning(f"    ❌ HTTP {e.response.status_code}: {url}")
+        return {"title": f"HTTP {e.response.status_code}", "status": "http_error", "content": ""}
     except Exception as e:
-        print(f"  ❌ Error fetching: {e}")
-        return {
-            "title": "Couldn't fetch",
-            "type": "unknown",
-            "status": "error",
-            "error": str(e)
-        }
+        logger.error(f"    ❌ Error: {e}")
+        return {"title": "Fetch failed", "status": "error", "error": str(e), "content": ""}
+
+# Analyze with OpenClaw gateway (Haiku)
+def analyze_with_gateway(url, title, content):
+    """Send to OpenClaw gateway for AI analysis"""
+    logger.debug(f"  🤖 Analyzing with gateway: {url}")
+    
+    # Build analysis prompt
+    prompt = f"""Analyze this webpage briefly (2-3 sentences max):
+
+URL: {url}
+Title: {title}
+
+Content excerpt:
+{content[:1000]}
+
+Provide:
+1. What is this about? (1 sentence)
+2. Who should read this? (optional)
+3. Suggested Tududi tag (e.g., "to-read", "learning", "inspiration", "tool")
+
+Keep it concise!"""
+    
+    try:
+        response = requests.post(
+            f"{GATEWAY_URL}/v1/messages",
+            headers={"Authorization": f"Bearer {GATEWAY_TOKEN}"} if GATEWAY_TOKEN else {},
+            json={
+                "messages": [{"role": "user", "content": prompt}],
+                "model": "openrouter/anthropic/claude-haiku-4.5",
+                "max_tokens": 150
+            },
+            timeout=10
+        )
+        
+        if response.status_code == 200:
+            result = response.json()
+            analysis = result.get("content", [{}])[0].get("text", "")
+            logger.debug(f"    ✓ Analysis complete")
+            return analysis
+        else:
+            logger.warning(f"    Gateway error: {response.status_code}")
+            return None
+    except Exception as e:
+        logger.warning(f"    Gateway timeout/error: {e}")
+        return None
 
 # Send to Tududi inbox
-def add_to_tududi(title, url, link_type):
+def add_to_tududi(title, url, link_type, analysis=""):
     """Add to Tududi inbox with summary"""
+    logger.debug(f"  📌 Adding to Tududi: {title}")
+    
     try:
         if not TUDUDI_API_KEY:
-            print("  ⚠️ TUDUDI_API_KEY not set")
+            logger.warning("    TUDUDI_API_KEY not set")
             return False
         
-        content = f"📌 {link_type}: {title}\n🔗 {url}"
+        content = f"📌 **{link_type}**: {title}\n🔗 {url}"
+        if analysis:
+            content += f"\n\n💡 Summary:\n{analysis}"
         
         response = requests.post(
             f"{TUDUDI_API_URL}/inbox",
@@ -106,13 +198,13 @@ def add_to_tududi(title, url, link_type):
         )
         
         if response.status_code == 200:
-            print(f"  ✅ Added to Tududi: {title}")
+            logger.info(f"    ✓ Added to Tududi inbox")
             return True
         else:
-            print(f"  ⚠️ Tududi error: {response.status_code}")
+            logger.warning(f"    Tududi error: {response.status_code}")
             return False
     except Exception as e:
-        print(f"  ❌ Tududi error: {e}")
+        logger.error(f"    Tududi error: {e}")
         return False
 
 # Discord bot
@@ -121,8 +213,8 @@ intents.message_content = True
 
 class LinkAnalyzerBot(discord.Client):
     async def on_ready(self):
-        print(f"✅ Bot logged in as {self.user}")
-        print(f"📍 Watching channel #remora ({CHANNEL_ID})")
+        logger.info(f"✅ Bot logged in as {self.user}")
+        logger.info(f"📍 Watching channel #remora ({CHANNEL_ID})")
 
     async def on_message(self, message):
         # Ignore bot's own messages
@@ -136,55 +228,78 @@ class LinkAnalyzerBot(discord.Client):
         # Check for URLs
         urls = extract_urls(message.content)
         if not urls:
+            logger.debug(f"No URLs in message from {message.author}")
             return
         
         # Skip if already processed
         tracker = load_tracker()
         if message.id in tracker["processed_message_ids"]:
+            logger.debug(f"Skipping already-processed message {message.id}")
             return
         
-        print(f"🔗 New link from {message.author}: {message.content}")
+        logger.info(f"🔗 New link(s) from {message.author}: {message.content}")
         
         # Process each URL
         for url in urls:
-            print(f"  Processing: {url}")
-            
-            # Analyze
-            analysis = analyze_url(url)
-            
-            # Add to Tududi
-            add_to_tududi(analysis["title"], url, analysis["type"])
-            
-            # Prepare response
-            summary = f"📌 **{analysis['type']}**: {analysis['title']}"
-            if analysis["status"] == "error":
-                summary += f"\n⚠️ {analysis['error']}"
-            
-            # Post summary in channel
-            await message.reply(summary, mention_author=False)
-            
-            # Add to tracker
-            tracker["links"].append({
-                "url": url,
-                "title": analysis["title"],
-                "type": analysis["type"],
-                "author": str(message.author),
-                "message_id": message.id,
-                "date": datetime.now().isoformat(),
-                "tududi": True
-            })
+            try:
+                logger.info(f"Processing: {url}")
+                link_type = detect_link_type(url)
+                
+                # Fetch content
+                fetch_result = fetch_url_content(url)
+                title = fetch_result["title"]
+                
+                # Analyze with gateway
+                analysis = None
+                if fetch_result["status"] == "ok":
+                    analysis = analyze_with_gateway(url, title, fetch_result.get("content", ""))
+                
+                # Add to Tududi
+                tududi_ok = add_to_tududi(title, url, link_type, analysis or "")
+                
+                # Format response for Discord
+                response_text = f"📌 **{link_type}**: {title}"
+                if analysis:
+                    # Truncate to 200 chars for Discord
+                    summary = analysis[:200].split('\n')[0]
+                    response_text += f"\n💡 {summary}"
+                
+                logger.debug(f"Posting response: {response_text}")
+                
+                # Post in channel
+                await message.reply(response_text, mention_author=False)
+                
+                # Update tracker
+                tracker["links"].append({
+                    "url": url,
+                    "title": title,
+                    "type": link_type,
+                    "author": str(message.author),
+                    "message_id": message.id,
+                    "date": datetime.now().isoformat(),
+                    "analysis": analysis,
+                    "tududi": tududi_ok,
+                    "fetch_status": fetch_result["status"]
+                })
+                
+                logger.info(f"✓ Processed: {url}")
+                
+            except Exception as e:
+                logger.error(f"❌ Error processing {url}: {e}")
+                await message.reply(f"❌ Error analyzing link: {e}", mention_author=False)
         
         # Update processed IDs
         tracker["processed_message_ids"].append(message.id)
         save_tracker(tracker)
+        logger.info(f"Updated tracker, total links: {len(tracker['links'])}")
 
 # Main
 if __name__ == "__main__":
     token = os.getenv("DISCORD_BOT_TOKEN")
     if not token:
-        print("❌ DISCORD_BOT_TOKEN not set!")
-        print("Set it: export DISCORD_BOT_TOKEN='your_token'")
+        logger.error("❌ DISCORD_BOT_TOKEN not set!")
         exit(1)
     
+    logger.info("Starting bot...")
     bot = LinkAnalyzerBot(intents=intents)
     bot.run(token)