Files

314 lines
9.8 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
"""
Discord bot for #remora channel - analyzes links in real-time with Haiku
Fetches content, sends to gateway for AI analysis, adds to Tududi inbox
"""
import discord
import os
import json
import re
import requests
from datetime import datetime
from pathlib import Path
from dotenv import load_dotenv
import logging
from urllib.parse import urlparse
# Load .env file
load_dotenv()
# Setup logging
log_file = Path(__file__).parent / "bot.log"
logging.basicConfig(
level=logging.DEBUG,
format='[%(asctime)s] [%(levelname)-8s] %(message)s',
handlers=[
logging.FileHandler(log_file),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
# Config
CHANNEL_ID = 1467557082583535729
TRACKER_FILE = Path(__file__).parent / "tracker.json"
TUDUDI_API_URL = os.getenv("TUDUDI_API_URL", "https://todo.dilain.com/api/v1")
TUDUDI_API_KEY = os.getenv("TUDUDI_API_KEY")
GATEWAY_URL = os.getenv("OPENCLAW_GATEWAY", "http://127.0.0.1:18789")
GATEWAY_TOKEN = os.getenv("OPENCLAW_GATEWAY_TOKEN", "")
logger.info("=" * 60)
logger.info("Bot startup")
logger.info(f" Channel ID: {CHANNEL_ID}")
logger.info(f" Tududi API: {TUDUDI_API_URL}")
logger.info(f" Gateway: {GATEWAY_URL}")
logger.info("=" * 60)
# Load or init tracker
def load_tracker():
if TRACKER_FILE.exists():
with open(TRACKER_FILE) as f:
return json.load(f)
return {
"channel_id": CHANNEL_ID,
"processed_message_ids": [],
"links": []
}
def save_tracker(data):
with open(TRACKER_FILE, "w") as f:
json.dump(data, f, indent=2)
# Detect links in text
def extract_urls(text):
url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
return re.findall(url_pattern, text)
# Detect link type
def detect_link_type(url):
domain = urlparse(url).netloc.lower()
if "github.com" in domain:
return "GitHub"
elif "reddit.com" in domain:
return "Reddit"
elif "youtube.com" in domain or "youtu.be" in domain:
return "YouTube"
elif "tiktok.com" in domain:
return "TikTok"
elif "twitter.com" in domain or "x.com" in domain:
return "Twitter/X"
elif "medium.com" in domain:
return "Medium"
elif "dev.to" in domain:
return "Dev.to"
elif "arxiv.org" in domain:
return "arXiv"
else:
return "Article"
# Fetch URL content
def fetch_url_content(url):
"""Fetch URL and return content"""
logger.debug(f" 📥 Fetching: {url}")
try:
response = requests.get(
url,
timeout=8,
headers={
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36',
'Accept': 'text/html,application/xhtml+xml'
},
allow_redirects=True
)
response.raise_for_status()
content = response.text[:5000] # First 5k chars
# Try to find title
title = None
title_match = re.search(r'<title[^>]*>\s*([^<]+?)\s*</title>', content, re.IGNORECASE)
if title_match:
title = title_match.group(1).strip()
if not title:
og_match = re.search(r'<meta\s+property="og:title"\s+content="([^"]+)"', content, re.IGNORECASE)
if og_match:
title = og_match.group(1).strip()
if not title:
title = url.split('/')[-1] or "Untitled"
logger.debug(f" ✓ Fetched: {title}")
return {
"title": title,
"content": content,
"status": "ok"
}
except requests.Timeout:
logger.warning(f" ⏱️ Timeout: {url}")
return {"title": "Request timeout", "status": "timeout", "content": ""}
except Exception as e:
logger.error(f" ❌ Error: {e}")
return {"title": "Fetch failed", "status": "error", "content": ""}
# Analyze with local heuristic (fallback when gateway unavailable)
def analyze_content(url, title, content, link_type):
"""Simple local analysis when gateway is unavailable"""
logger.debug(f" 🤖 Local analysis: {url}")
try:
# Simple tag based on link type
tag = "to-read"
if link_type == "GitHub":
tag = "project"
elif link_type == "YouTube":
tag = "video"
elif link_type == "Reddit":
tag = "discussion"
elif link_type in ["Medium", "Dev.to"]:
tag = "article"
elif link_type == "arXiv":
tag = "learning"
# Simple summary based on title and link type
summary = f"lien {link_type.lower()} : {title}"
logger.info(f" ✓ Local analysis complete")
return {
"summary": summary,
"tag": tag
}
except Exception as e:
logger.error(f" Analysis error: {e}")
return None
# Send to Tududi inbox
def add_to_tududi(title, url, link_type, summary="", tag=""):
"""Add to Tududi inbox"""
logger.debug(f" 📌 Adding to Tududi: {title}")
try:
if not TUDUDI_API_KEY:
logger.warning(" TUDUDI_API_KEY not set")
return False
content = f"📌 **{link_type}**: {title}\n🔗 {url}"
if summary:
content += f"\n\n💡 {summary}"
if tag:
content += f"\n\n🏷️ {tag}"
response = requests.post(
f"{TUDUDI_API_URL}/inbox",
headers={
"Authorization": f"Bearer {TUDUDI_API_KEY}",
"Content-Type": "application/json"
},
json={"content": content},
timeout=5
)
if response.status_code in [200, 201]:
logger.info(f" ✓ Added to Tududi")
return True
else:
logger.warning(f" Tududi error: {response.status_code}")
return False
except Exception as e:
logger.error(f" Tududi error: {e}")
return False
# Discord bot
intents = discord.Intents.default()
intents.message_content = True
class LinkAnalyzerBot(discord.Client):
async def on_ready(self):
logger.info(f"✅ Bot logged in as {self.user}")
logger.info(f"📍 Watching channel #remora ({CHANNEL_ID})")
async def on_message(self, message):
# Ignore bot's own messages
if message.author == self.user:
return
# Only process #remora channel
if message.channel.id != CHANNEL_ID:
return
# Check for URLs
urls = extract_urls(message.content)
if not urls:
logger.debug(f"No URLs in message from {message.author}")
return
# Skip if already processed
tracker = load_tracker()
if message.id in tracker["processed_message_ids"]:
logger.debug(f"Skipping already-processed message {message.id}")
return
logger.info(f"🔗 New link(s) from {message.author}: {message.content}")
# Process each URL
for url in urls:
try:
logger.info(f"Processing: {url}")
link_type = detect_link_type(url)
# Fetch content
fetch_result = fetch_url_content(url)
title = fetch_result["title"]
# Analyze with Haiku
analysis_data = None
if fetch_result["status"] == "ok":
logger.debug(f" Analyzing...")
analysis_data = analyze_content(url, title, fetch_result["content"], link_type)
# Prepare summary
summary_text = ""
tag = "interesting"
if analysis_data:
summary_text = analysis_data.get("summary", "")
tag = analysis_data.get("tag", "interesting")
logger.debug(f" Summary: {summary_text[:60]}")
# Add to Tududi
add_to_tududi(title, url, link_type, summary_text, tag)
# Format response
response_text = f"📌 **{link_type}**: {title}"
if summary_text:
response_text += f"\n\n💡 {summary_text}"
if tag:
response_text += f"\n\n🏷️ `{tag}`"
logger.debug(f"Posting response...")
# Post in channel
await message.reply(response_text, mention_author=False)
# Update tracker
tracker["links"].append({
"url": url,
"title": title,
"type": link_type,
"author": str(message.author),
"message_id": message.id,
"date": datetime.now().isoformat(),
"summary": summary_text,
"tag": tag
})
logger.info(f"✓ Processed: {url}")
except Exception as e:
logger.error(f"❌ Error: {e}")
import traceback
logger.error(traceback.format_exc())
try:
await message.reply(f"❌ Error: {str(e)[:100]}", mention_author=False)
except:
pass
# Update tracker
tracker["processed_message_ids"].append(message.id)
save_tracker(tracker)
logger.info(f"Updated tracker: {len(tracker['links'])} links total")
# Main
if __name__ == "__main__":
token = os.getenv("DISCORD_BOT_TOKEN")
if not token:
logger.error("❌ DISCORD_BOT_TOKEN not set!")
exit(1)
logger.info("Starting bot...")
bot = LinkAnalyzerBot(intents=intents)
bot.run(token)