Feat: Add web_fetch, AI analysis (Haiku), and comprehensive logging

This commit is contained in:
Remora
2026-02-09 18:46:45 +01:00
parent 23548be6ad
commit 03999875b5

229
bot.py
View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
Discord bot for #remora channel - analyzes links in real-time Discord bot for #remora channel - analyzes links in real-time with web_fetch + AI
Posts summaries, adds to Tududi inbox, maintains JSON history Posts summaries, adds to Tududi inbox, maintains JSON history + logs
""" """
import discord import discord
@@ -12,17 +12,38 @@ import requests
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from dotenv import load_dotenv from dotenv import load_dotenv
import logging
from urllib.parse import urlparse
# Load .env file # Load .env file
load_dotenv() load_dotenv()
# Setup logging
log_file = Path(__file__).parent / "bot.log"
logging.basicConfig(
level=logging.DEBUG,
format='[%(asctime)s] [%(levelname)-8s] %(message)s',
handlers=[
logging.FileHandler(log_file),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
# Config # Config
CHANNEL_ID = 1467557082583535729 CHANNEL_ID = 1467557082583535729
TRACKER_FILE = Path(__file__).parent / "tracker.json" TRACKER_FILE = Path(__file__).parent / "tracker.json"
TUDUDI_API_URL = os.getenv("TUDUDI_API_URL", "https://todo.dilain.com/api/v1") TUDUDI_API_URL = os.getenv("TUDUDI_API_URL", "https://todo.dilain.com/api/v1")
TUDUDI_API_KEY = os.getenv("TUDUDI_API_KEY") TUDUDI_API_KEY = os.getenv("TUDUDI_API_KEY")
GATEWAY_URL = os.getenv("OPENCLAW_GATEWAY", "http://127.0.0.1:18789") GATEWAY_URL = os.getenv("OPENCLAW_GATEWAY", "http://127.0.0.1:18789")
GATEWAY_TOKEN = os.getenv("OPENCLAW_GATEWAY_TOKEN") GATEWAY_TOKEN = os.getenv("OPENCLAW_GATEWAY_TOKEN", "")
logger.info("=" * 60)
logger.info("Bot startup")
logger.info(f" Channel ID: {CHANNEL_ID}")
logger.info(f" Tududi API: {TUDUDI_API_URL}")
logger.info(f" Gateway: {GATEWAY_URL}")
logger.info("=" * 60)
# Load or init tracker # Load or init tracker
def load_tracker(): def load_tracker():
@@ -44,56 +65,127 @@ def extract_urls(text):
url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+' url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
return re.findall(url_pattern, text) return re.findall(url_pattern, text)
# Fetch and analyze URL # Detect link type
def analyze_url(url): def detect_link_type(url):
"""Fetch URL and create summary""" domain = urlparse(url).netloc.lower()
if "github.com" in domain:
return "GitHub"
elif "reddit.com" in domain:
return "Reddit"
elif "youtube.com" in domain or "youtu.be" in domain:
return "YouTube"
elif "tiktok.com" in domain:
return "TikTok"
elif "twitter.com" in domain or "x.com" in domain:
return "Twitter/X"
elif "medium.com" in domain:
return "Medium"
elif "dev.to" in domain:
return "Dev.to"
elif "arxiv.org" in domain:
return "arXiv"
else:
return "Article"
# Fetch URL content using requests
def fetch_url_content(url):
"""Fetch URL and return title + excerpt"""
logger.debug(f" 📥 Fetching: {url}")
try: try:
print(f" 📥 Fetching: {url}") response = requests.get(
response = requests.get(url, timeout=5, headers={ url,
'User-Agent': 'Mozilla/5.0' timeout=5,
}) headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64)'},
content = response.text[:2000] # First 2k chars allow_redirects=True
)
response.raise_for_status()
content = response.text[:3000] # First 3k chars
# Extract title # Extract title
title_match = re.search(r'<title[^>]*>([^<]+)</title>', content, re.IGNORECASE) title_match = re.search(r'<title[^>]*>([^<]+)</title>', content, re.IGNORECASE)
title = title_match.group(1).strip() if title_match else url.split('/')[-1] title = title_match.group(1).strip() if title_match else "No title found"
# Simple content type detection # Extract meta description
link_type = "webpage" desc_match = re.search(r'<meta\s+name="description"\s+content="([^"]+)"', content, re.IGNORECASE)
if "github.com" in url: description = desc_match.group(1) if desc_match else ""
link_type = "GitHub"
elif "reddit.com" in url:
link_type = "Reddit"
elif "youtube.com" in url or "youtu.be" in url:
link_type = "YouTube"
elif "tiktok.com" in url:
link_type = "TikTok"
elif "twitter.com" in url or "x.com" in url:
link_type = "Twitter/X"
logger.debug(f" ✓ Fetched: {title}")
return { return {
"title": title, "title": title,
"type": link_type, "description": description,
"content": content,
"status": "ok" "status": "ok"
} }
except requests.Timeout:
logger.warning(f" ⏱️ Timeout: {url}")
return {"title": "Request timeout", "status": "timeout", "content": ""}
except requests.HTTPError as e:
logger.warning(f" ❌ HTTP {e.response.status_code}: {url}")
return {"title": f"HTTP {e.response.status_code}", "status": "http_error", "content": ""}
except Exception as e: except Exception as e:
print(f" ❌ Error fetching: {e}") logger.error(f" ❌ Error: {e}")
return { return {"title": "Fetch failed", "status": "error", "error": str(e), "content": ""}
"title": "Couldn't fetch",
"type": "unknown", # Analyze with OpenClaw gateway (Haiku)
"status": "error", def analyze_with_gateway(url, title, content):
"error": str(e) """Send to OpenClaw gateway for AI analysis"""
} logger.debug(f" 🤖 Analyzing with gateway: {url}")
# Build analysis prompt
prompt = f"""Analyze this webpage briefly (2-3 sentences max):
URL: {url}
Title: {title}
Content excerpt:
{content[:1000]}
Provide:
1. What is this about? (1 sentence)
2. Who should read this? (optional)
3. Suggested Tududi tag (e.g., "to-read", "learning", "inspiration", "tool")
Keep it concise!"""
try:
response = requests.post(
f"{GATEWAY_URL}/v1/messages",
headers={"Authorization": f"Bearer {GATEWAY_TOKEN}"} if GATEWAY_TOKEN else {},
json={
"messages": [{"role": "user", "content": prompt}],
"model": "openrouter/anthropic/claude-haiku-4.5",
"max_tokens": 150
},
timeout=10
)
if response.status_code == 200:
result = response.json()
analysis = result.get("content", [{}])[0].get("text", "")
logger.debug(f" ✓ Analysis complete")
return analysis
else:
logger.warning(f" Gateway error: {response.status_code}")
return None
except Exception as e:
logger.warning(f" Gateway timeout/error: {e}")
return None
# Send to Tududi inbox # Send to Tududi inbox
def add_to_tududi(title, url, link_type): def add_to_tududi(title, url, link_type, analysis=""):
"""Add to Tududi inbox with summary""" """Add to Tududi inbox with summary"""
logger.debug(f" 📌 Adding to Tududi: {title}")
try: try:
if not TUDUDI_API_KEY: if not TUDUDI_API_KEY:
print(" ⚠️ TUDUDI_API_KEY not set") logger.warning(" TUDUDI_API_KEY not set")
return False return False
content = f"📌 {link_type}: {title}\n🔗 {url}" content = f"📌 **{link_type}**: {title}\n🔗 {url}"
if analysis:
content += f"\n\n💡 Summary:\n{analysis}"
response = requests.post( response = requests.post(
f"{TUDUDI_API_URL}/inbox", f"{TUDUDI_API_URL}/inbox",
@@ -106,13 +198,13 @@ def add_to_tududi(title, url, link_type):
) )
if response.status_code == 200: if response.status_code == 200:
print(f" Added to Tududi: {title}") logger.info(f" Added to Tududi inbox")
return True return True
else: else:
print(f" ⚠️ Tududi error: {response.status_code}") logger.warning(f" Tududi error: {response.status_code}")
return False return False
except Exception as e: except Exception as e:
print(f" Tududi error: {e}") logger.error(f" Tududi error: {e}")
return False return False
# Discord bot # Discord bot
@@ -121,8 +213,8 @@ intents.message_content = True
class LinkAnalyzerBot(discord.Client): class LinkAnalyzerBot(discord.Client):
async def on_ready(self): async def on_ready(self):
print(f"✅ Bot logged in as {self.user}") logger.info(f"✅ Bot logged in as {self.user}")
print(f"📍 Watching channel #remora ({CHANNEL_ID})") logger.info(f"📍 Watching channel #remora ({CHANNEL_ID})")
async def on_message(self, message): async def on_message(self, message):
# Ignore bot's own messages # Ignore bot's own messages
@@ -136,55 +228,78 @@ class LinkAnalyzerBot(discord.Client):
# Check for URLs # Check for URLs
urls = extract_urls(message.content) urls = extract_urls(message.content)
if not urls: if not urls:
logger.debug(f"No URLs in message from {message.author}")
return return
# Skip if already processed # Skip if already processed
tracker = load_tracker() tracker = load_tracker()
if message.id in tracker["processed_message_ids"]: if message.id in tracker["processed_message_ids"]:
logger.debug(f"Skipping already-processed message {message.id}")
return return
print(f"🔗 New link from {message.author}: {message.content}") logger.info(f"🔗 New link(s) from {message.author}: {message.content}")
# Process each URL # Process each URL
for url in urls: for url in urls:
print(f" Processing: {url}") try:
logger.info(f"Processing: {url}")
link_type = detect_link_type(url)
# Analyze # Fetch content
analysis = analyze_url(url) fetch_result = fetch_url_content(url)
title = fetch_result["title"]
# Analyze with gateway
analysis = None
if fetch_result["status"] == "ok":
analysis = analyze_with_gateway(url, title, fetch_result.get("content", ""))
# Add to Tududi # Add to Tududi
add_to_tududi(analysis["title"], url, analysis["type"]) tududi_ok = add_to_tududi(title, url, link_type, analysis or "")
# Prepare response # Format response for Discord
summary = f"📌 **{analysis['type']}**: {analysis['title']}" response_text = f"📌 **{link_type}**: {title}"
if analysis["status"] == "error": if analysis:
summary += f"\n⚠️ {analysis['error']}" # Truncate to 200 chars for Discord
summary = analysis[:200].split('\n')[0]
response_text += f"\n💡 {summary}"
# Post summary in channel logger.debug(f"Posting response: {response_text}")
await message.reply(summary, mention_author=False)
# Add to tracker # Post in channel
await message.reply(response_text, mention_author=False)
# Update tracker
tracker["links"].append({ tracker["links"].append({
"url": url, "url": url,
"title": analysis["title"], "title": title,
"type": analysis["type"], "type": link_type,
"author": str(message.author), "author": str(message.author),
"message_id": message.id, "message_id": message.id,
"date": datetime.now().isoformat(), "date": datetime.now().isoformat(),
"tududi": True "analysis": analysis,
"tududi": tududi_ok,
"fetch_status": fetch_result["status"]
}) })
logger.info(f"✓ Processed: {url}")
except Exception as e:
logger.error(f"❌ Error processing {url}: {e}")
await message.reply(f"❌ Error analyzing link: {e}", mention_author=False)
# Update processed IDs # Update processed IDs
tracker["processed_message_ids"].append(message.id) tracker["processed_message_ids"].append(message.id)
save_tracker(tracker) save_tracker(tracker)
logger.info(f"Updated tracker, total links: {len(tracker['links'])}")
# Main # Main
if __name__ == "__main__": if __name__ == "__main__":
token = os.getenv("DISCORD_BOT_TOKEN") token = os.getenv("DISCORD_BOT_TOKEN")
if not token: if not token:
print("❌ DISCORD_BOT_TOKEN not set!") logger.error("❌ DISCORD_BOT_TOKEN not set!")
print("Set it: export DISCORD_BOT_TOKEN='your_token'")
exit(1) exit(1)
logger.info("Starting bot...")
bot = LinkAnalyzerBot(intents=intents) bot = LinkAnalyzerBot(intents=intents)
bot.run(token) bot.run(token)