tag title_match = re.search(r'<title[^>]*>\s*([^<]+?)\s*

#!/usr/bin/env python3 """ Discord bot for #remora channel - analyzes links in real-time with web_fetch + AI Posts summaries, adds to Tududi inbox, maintains JSON history + logs """ import discord import os import json import re import requests from datetime import datetime from pathlib import Path from dotenv import load_dotenv import logging from urllib.parse import urlparse # Load .env file load_dotenv() # Setup logging log_file = Path(__file__).parent / "bot.log" logging.basicConfig( level=logging.DEBUG, format='[%(asctime)s] [%(levelname)-8s] %(message)s', handlers=[ logging.FileHandler(log_file), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) # Config CHANNEL_ID = 1467557082583535729 TRACKER_FILE = Path(__file__).parent / "tracker.json" TUDUDI_API_URL = os.getenv("TUDUDI_API_URL", "https://todo.dilain.com/api/v1") TUDUDI_API_KEY = os.getenv("TUDUDI_API_KEY") GATEWAY_URL = os.getenv("OPENCLAW_GATEWAY", "http://127.0.0.1:18789") GATEWAY_TOKEN = os.getenv("OPENCLAW_GATEWAY_TOKEN", "") logger.info("=" * 60) logger.info("Bot startup") logger.info(f" Channel ID: {CHANNEL_ID}") logger.info(f" Tududi API: {TUDUDI_API_URL}") logger.info(f" Gateway: {GATEWAY_URL}") logger.info("=" * 60) # Load or init tracker def load_tracker(): if TRACKER_FILE.exists(): with open(TRACKER_FILE) as f: return json.load(f) return { "channel_id": CHANNEL_ID, "processed_message_ids": [], "links": [] } def save_tracker(data): with open(TRACKER_FILE, "w") as f: json.dump(data, f, indent=2) # Detect links in text def extract_urls(text): url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+' return re.findall(url_pattern, text) # Detect link type def detect_link_type(url): domain = urlparse(url).netloc.lower() if "github.com" in domain: return "GitHub" elif "reddit.com" in domain: return "Reddit" elif "youtube.com" in domain or "youtu.be" in domain: return "YouTube" elif "tiktok.com" in domain: return "TikTok" elif "twitter.com" in domain or "x.com" in domain: return "Twitter/X" elif "medium.com" in domain: return "Medium" elif "dev.to" in domain: return "Dev.to" elif "arxiv.org" in domain: return "arXiv" else: return "Article" # Fetch URL content using requests def fetch_url_content(url): """Fetch URL and return title + excerpt""" logger.debug(f" 📥 Fetching: {url}") try: response = requests.get( url, timeout=8, headers={ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36', 'Accept': 'text/html,application/xhtml+xml' }, allow_redirects=True ) response.raise_for_status() content = response.text[:4000] # First 4k chars # Try multiple patterns for title title = None # Pattern 1: tag title_match = re.search(r'<title[^>]*>\s*([^<]+?)\s*', content, re.IGNORECASE) if title_match: title = title_match.group(1).strip() # Pattern 2: og:title meta tag (for GitHub, etc.) if not title: og_match = re.search(r']*>([^<]+)', content, re.IGNORECASE) if h1_match: title = h1_match.group(1).strip() # Fallback if not title: title = url.split('/')[-1] or "Untitled" # Extract meta description desc_match = re.search(r']*>([^<]+)

', content, re.IGNORECASE) if p_match: description = p_match.group(1).strip()[:200] # Determine tag based on content + URL + type tag = "interesting" summary = "" if link_type == "GitHub": tag = "project" summary = f"GitHub repository: {title}" # Try to extract more info from README readme_match = re.search(r'README[^<]*[^<]*]*>([^<]+)', content, re.IGNORECASE) if readme_match: summary += f". {readme_match.group(1)[:100]}" elif link_type == "YouTube": tag = "video" summary = f"Video: {title}" if description: summary += f". {description[:80]}" elif link_type == "Reddit": tag = "discussion" summary = f"Reddit discussion: {title}" elif link_type == "Medium" or link_type == "Dev.to": tag = "article" summary = f"Article: {title}" if description: summary += f". {description[:80]}" elif link_type == "arXiv": tag = "learning" summary = f"Research paper: {title}" else: # Generic web article tag = "to-read" summary = title if description: summary += f". {description[:100]}" # Truncate summary to reasonable length summary = summary[:200] logger.info(f" ✓ Analysis complete - Tag: {tag}, Summary: {summary[:60]}") result = { "summary": summary, "tag": tag, "relevance": "relevant" } logger.debug(f" Returning: {result}") return result except Exception as e: logger.error(f" Analysis error: {e}") import traceback logger.error(traceback.format_exc()) # Return minimal analysis return { "summary": title, "tag": "interesting", "relevance": "relevant" } # Send to Tududi inbox def add_to_tududi(title, url, link_type, summary="", tag=""): """Add to Tududi inbox with intelligent summary""" logger.debug(f" 📌 Adding to Tududi: {title}") try: if not TUDUDI_API_KEY: logger.warning(" TUDUDI_API_KEY not set") return False # Format the inbox content content = f"📌 **{link_type}**: {title}\n🔗 {url}" if summary: content += f"\n\n💡 **Summary**:\n{summary}" if tag: content += f"\n\n🏷️ **Tag**: {tag}" response = requests.post( f"{TUDUDI_API_URL}/inbox", headers={ "Authorization": f"Bearer {TUDUDI_API_KEY}", "Content-Type": "application/json" }, json={"content": content}, timeout=5 ) if response.status_code in [200, 201]: # 200 or 201 are both OK logger.info(f" ✓ Added to Tududi inbox with tag: {tag}") return True else: logger.warning(f" Tududi error: {response.status_code}") return False except Exception as e: logger.error(f" Tududi error: {e}") return False # Discord bot intents = discord.Intents.default() intents.message_content = True class LinkAnalyzerBot(discord.Client): async def on_ready(self): logger.info(f"✅ Bot logged in as {self.user}") logger.info(f"📍 Watching channel #remora ({CHANNEL_ID})") async def on_message(self, message): # Ignore bot's own messages if message.author == self.user: return # Only process #remora channel if message.channel.id != CHANNEL_ID: return # Check for URLs urls = extract_urls(message.content) if not urls: logger.debug(f"No URLs in message from {message.author}") return # Skip if already processed tracker = load_tracker() if message.id in tracker["processed_message_ids"]: logger.debug(f"Skipping already-processed message {message.id}") return logger.info(f"🔗 New link(s) from {message.author}: {message.content}") # Process each URL for url in urls: try: logger.info(f"Processing: {url}") link_type = detect_link_type(url) # Fetch content fetch_result = fetch_url_content(url) title = fetch_result["title"] # Analyze content if fetch was successful analysis_data = None logger.debug(f" 📊 Fetch status: {fetch_result['status']}") if fetch_result["status"] == "ok": logger.debug(f" 🔍 Starting analysis...") analysis_data = analyze_content(url, title, fetch_result.get("content", ""), link_type) logger.debug(f" Analysis result: {analysis_data}") else: logger.debug(f" ⚠️ Fetch failed, skipping analysis") # Prepare summary for Tududi summary_text = "" tag = "interesting" if analysis_data: summary_text = analysis_data.get("summary", "") tag = analysis_data.get("tag", "interesting") logger.debug(f" ✓ Got summary: {summary_text[:80]}") else: logger.warning(f" ❌ No analysis data returned") # Add to Tududi with summary tududi_ok = add_to_tududi(title, url, link_type, summary_text, tag) # Format response for Discord response_text = f"📌 **{link_type}**: {title}" if summary_text: response_text += f"\n\n💡 {summary_text}" if tag: response_text += f"\n\n🏷️ Tag: `{tag}`" logger.debug(f"Posting response: {response_text}") # Post in channel await message.reply(response_text, mention_author=False) # Update tracker tracker["links"].append({ "url": url, "title": title, "type": link_type, "author": str(message.author), "message_id": message.id, "date": datetime.now().isoformat(), "analysis": analysis_data, "tududi": tududi_ok, "fetch_status": fetch_result["status"] }) logger.info(f"✓ Processed: {url}") except Exception as e: logger.error(f"❌ Error processing {url}: {e}") await message.reply(f"❌ Error analyzing link: {e}", mention_author=False) # Update processed IDs tracker["processed_message_ids"].append(message.id) save_tracker(tracker) logger.info(f"Updated tracker, total links: {len(tracker['links'])}") # Main if __name__ == "__main__": token = os.getenv("DISCORD_BOT_TOKEN") if not token: logger.error("❌ DISCORD_BOT_TOKEN not set!") exit(1) logger.info("Starting bot...") bot = LinkAnalyzerBot(intents=intents) bot.run(token)