Feat: Add web_fetch, AI analysis (Haiku), and comprehensive logging
This commit is contained in:
229
bot.py
229
bot.py
@@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
Discord bot for #remora channel - analyzes links in real-time
|
Discord bot for #remora channel - analyzes links in real-time with web_fetch + AI
|
||||||
Posts summaries, adds to Tududi inbox, maintains JSON history
|
Posts summaries, adds to Tududi inbox, maintains JSON history + logs
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import discord
|
import discord
|
||||||
@@ -12,17 +12,38 @@ import requests
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
import logging
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
# Load .env file
|
# Load .env file
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
# Setup logging
|
||||||
|
log_file = Path(__file__).parent / "bot.log"
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.DEBUG,
|
||||||
|
format='[%(asctime)s] [%(levelname)-8s] %(message)s',
|
||||||
|
handlers=[
|
||||||
|
logging.FileHandler(log_file),
|
||||||
|
logging.StreamHandler()
|
||||||
|
]
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Config
|
# Config
|
||||||
CHANNEL_ID = 1467557082583535729
|
CHANNEL_ID = 1467557082583535729
|
||||||
TRACKER_FILE = Path(__file__).parent / "tracker.json"
|
TRACKER_FILE = Path(__file__).parent / "tracker.json"
|
||||||
TUDUDI_API_URL = os.getenv("TUDUDI_API_URL", "https://todo.dilain.com/api/v1")
|
TUDUDI_API_URL = os.getenv("TUDUDI_API_URL", "https://todo.dilain.com/api/v1")
|
||||||
TUDUDI_API_KEY = os.getenv("TUDUDI_API_KEY")
|
TUDUDI_API_KEY = os.getenv("TUDUDI_API_KEY")
|
||||||
GATEWAY_URL = os.getenv("OPENCLAW_GATEWAY", "http://127.0.0.1:18789")
|
GATEWAY_URL = os.getenv("OPENCLAW_GATEWAY", "http://127.0.0.1:18789")
|
||||||
GATEWAY_TOKEN = os.getenv("OPENCLAW_GATEWAY_TOKEN")
|
GATEWAY_TOKEN = os.getenv("OPENCLAW_GATEWAY_TOKEN", "")
|
||||||
|
|
||||||
|
logger.info("=" * 60)
|
||||||
|
logger.info("Bot startup")
|
||||||
|
logger.info(f" Channel ID: {CHANNEL_ID}")
|
||||||
|
logger.info(f" Tududi API: {TUDUDI_API_URL}")
|
||||||
|
logger.info(f" Gateway: {GATEWAY_URL}")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
# Load or init tracker
|
# Load or init tracker
|
||||||
def load_tracker():
|
def load_tracker():
|
||||||
@@ -44,56 +65,127 @@ def extract_urls(text):
|
|||||||
url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
|
url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
|
||||||
return re.findall(url_pattern, text)
|
return re.findall(url_pattern, text)
|
||||||
|
|
||||||
# Fetch and analyze URL
|
# Detect link type
|
||||||
def analyze_url(url):
|
def detect_link_type(url):
|
||||||
"""Fetch URL and create summary"""
|
domain = urlparse(url).netloc.lower()
|
||||||
|
|
||||||
|
if "github.com" in domain:
|
||||||
|
return "GitHub"
|
||||||
|
elif "reddit.com" in domain:
|
||||||
|
return "Reddit"
|
||||||
|
elif "youtube.com" in domain or "youtu.be" in domain:
|
||||||
|
return "YouTube"
|
||||||
|
elif "tiktok.com" in domain:
|
||||||
|
return "TikTok"
|
||||||
|
elif "twitter.com" in domain or "x.com" in domain:
|
||||||
|
return "Twitter/X"
|
||||||
|
elif "medium.com" in domain:
|
||||||
|
return "Medium"
|
||||||
|
elif "dev.to" in domain:
|
||||||
|
return "Dev.to"
|
||||||
|
elif "arxiv.org" in domain:
|
||||||
|
return "arXiv"
|
||||||
|
else:
|
||||||
|
return "Article"
|
||||||
|
|
||||||
|
# Fetch URL content using requests
|
||||||
|
def fetch_url_content(url):
|
||||||
|
"""Fetch URL and return title + excerpt"""
|
||||||
|
logger.debug(f" 📥 Fetching: {url}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
print(f" 📥 Fetching: {url}")
|
response = requests.get(
|
||||||
response = requests.get(url, timeout=5, headers={
|
url,
|
||||||
'User-Agent': 'Mozilla/5.0'
|
timeout=5,
|
||||||
})
|
headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64)'},
|
||||||
content = response.text[:2000] # First 2k chars
|
allow_redirects=True
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
content = response.text[:3000] # First 3k chars
|
||||||
|
|
||||||
# Extract title
|
# Extract title
|
||||||
title_match = re.search(r'<title[^>]*>([^<]+)</title>', content, re.IGNORECASE)
|
title_match = re.search(r'<title[^>]*>([^<]+)</title>', content, re.IGNORECASE)
|
||||||
title = title_match.group(1).strip() if title_match else url.split('/')[-1]
|
title = title_match.group(1).strip() if title_match else "No title found"
|
||||||
|
|
||||||
# Simple content type detection
|
# Extract meta description
|
||||||
link_type = "webpage"
|
desc_match = re.search(r'<meta\s+name="description"\s+content="([^"]+)"', content, re.IGNORECASE)
|
||||||
if "github.com" in url:
|
description = desc_match.group(1) if desc_match else ""
|
||||||
link_type = "GitHub"
|
|
||||||
elif "reddit.com" in url:
|
|
||||||
link_type = "Reddit"
|
|
||||||
elif "youtube.com" in url or "youtu.be" in url:
|
|
||||||
link_type = "YouTube"
|
|
||||||
elif "tiktok.com" in url:
|
|
||||||
link_type = "TikTok"
|
|
||||||
elif "twitter.com" in url or "x.com" in url:
|
|
||||||
link_type = "Twitter/X"
|
|
||||||
|
|
||||||
|
logger.debug(f" ✓ Fetched: {title}")
|
||||||
return {
|
return {
|
||||||
"title": title,
|
"title": title,
|
||||||
"type": link_type,
|
"description": description,
|
||||||
|
"content": content,
|
||||||
"status": "ok"
|
"status": "ok"
|
||||||
}
|
}
|
||||||
|
except requests.Timeout:
|
||||||
|
logger.warning(f" ⏱️ Timeout: {url}")
|
||||||
|
return {"title": "Request timeout", "status": "timeout", "content": ""}
|
||||||
|
except requests.HTTPError as e:
|
||||||
|
logger.warning(f" ❌ HTTP {e.response.status_code}: {url}")
|
||||||
|
return {"title": f"HTTP {e.response.status_code}", "status": "http_error", "content": ""}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f" ❌ Error fetching: {e}")
|
logger.error(f" ❌ Error: {e}")
|
||||||
return {
|
return {"title": "Fetch failed", "status": "error", "error": str(e), "content": ""}
|
||||||
"title": "Couldn't fetch",
|
|
||||||
"type": "unknown",
|
# Analyze with OpenClaw gateway (Haiku)
|
||||||
"status": "error",
|
def analyze_with_gateway(url, title, content):
|
||||||
"error": str(e)
|
"""Send to OpenClaw gateway for AI analysis"""
|
||||||
}
|
logger.debug(f" 🤖 Analyzing with gateway: {url}")
|
||||||
|
|
||||||
|
# Build analysis prompt
|
||||||
|
prompt = f"""Analyze this webpage briefly (2-3 sentences max):
|
||||||
|
|
||||||
|
URL: {url}
|
||||||
|
Title: {title}
|
||||||
|
|
||||||
|
Content excerpt:
|
||||||
|
{content[:1000]}
|
||||||
|
|
||||||
|
Provide:
|
||||||
|
1. What is this about? (1 sentence)
|
||||||
|
2. Who should read this? (optional)
|
||||||
|
3. Suggested Tududi tag (e.g., "to-read", "learning", "inspiration", "tool")
|
||||||
|
|
||||||
|
Keep it concise!"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
f"{GATEWAY_URL}/v1/messages",
|
||||||
|
headers={"Authorization": f"Bearer {GATEWAY_TOKEN}"} if GATEWAY_TOKEN else {},
|
||||||
|
json={
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"model": "openrouter/anthropic/claude-haiku-4.5",
|
||||||
|
"max_tokens": 150
|
||||||
|
},
|
||||||
|
timeout=10
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
result = response.json()
|
||||||
|
analysis = result.get("content", [{}])[0].get("text", "")
|
||||||
|
logger.debug(f" ✓ Analysis complete")
|
||||||
|
return analysis
|
||||||
|
else:
|
||||||
|
logger.warning(f" Gateway error: {response.status_code}")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f" Gateway timeout/error: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
# Send to Tududi inbox
|
# Send to Tududi inbox
|
||||||
def add_to_tududi(title, url, link_type):
|
def add_to_tududi(title, url, link_type, analysis=""):
|
||||||
"""Add to Tududi inbox with summary"""
|
"""Add to Tududi inbox with summary"""
|
||||||
|
logger.debug(f" 📌 Adding to Tududi: {title}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if not TUDUDI_API_KEY:
|
if not TUDUDI_API_KEY:
|
||||||
print(" ⚠️ TUDUDI_API_KEY not set")
|
logger.warning(" TUDUDI_API_KEY not set")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
content = f"📌 {link_type}: {title}\n🔗 {url}"
|
content = f"📌 **{link_type}**: {title}\n🔗 {url}"
|
||||||
|
if analysis:
|
||||||
|
content += f"\n\n💡 Summary:\n{analysis}"
|
||||||
|
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
f"{TUDUDI_API_URL}/inbox",
|
f"{TUDUDI_API_URL}/inbox",
|
||||||
@@ -106,13 +198,13 @@ def add_to_tududi(title, url, link_type):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
print(f" ✅ Added to Tududi: {title}")
|
logger.info(f" ✓ Added to Tududi inbox")
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
print(f" ⚠️ Tududi error: {response.status_code}")
|
logger.warning(f" Tududi error: {response.status_code}")
|
||||||
return False
|
return False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f" ❌ Tududi error: {e}")
|
logger.error(f" Tududi error: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Discord bot
|
# Discord bot
|
||||||
@@ -121,8 +213,8 @@ intents.message_content = True
|
|||||||
|
|
||||||
class LinkAnalyzerBot(discord.Client):
|
class LinkAnalyzerBot(discord.Client):
|
||||||
async def on_ready(self):
|
async def on_ready(self):
|
||||||
print(f"✅ Bot logged in as {self.user}")
|
logger.info(f"✅ Bot logged in as {self.user}")
|
||||||
print(f"📍 Watching channel #remora ({CHANNEL_ID})")
|
logger.info(f"📍 Watching channel #remora ({CHANNEL_ID})")
|
||||||
|
|
||||||
async def on_message(self, message):
|
async def on_message(self, message):
|
||||||
# Ignore bot's own messages
|
# Ignore bot's own messages
|
||||||
@@ -136,55 +228,78 @@ class LinkAnalyzerBot(discord.Client):
|
|||||||
# Check for URLs
|
# Check for URLs
|
||||||
urls = extract_urls(message.content)
|
urls = extract_urls(message.content)
|
||||||
if not urls:
|
if not urls:
|
||||||
|
logger.debug(f"No URLs in message from {message.author}")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Skip if already processed
|
# Skip if already processed
|
||||||
tracker = load_tracker()
|
tracker = load_tracker()
|
||||||
if message.id in tracker["processed_message_ids"]:
|
if message.id in tracker["processed_message_ids"]:
|
||||||
|
logger.debug(f"Skipping already-processed message {message.id}")
|
||||||
return
|
return
|
||||||
|
|
||||||
print(f"🔗 New link from {message.author}: {message.content}")
|
logger.info(f"🔗 New link(s) from {message.author}: {message.content}")
|
||||||
|
|
||||||
# Process each URL
|
# Process each URL
|
||||||
for url in urls:
|
for url in urls:
|
||||||
print(f" Processing: {url}")
|
try:
|
||||||
|
logger.info(f"Processing: {url}")
|
||||||
|
link_type = detect_link_type(url)
|
||||||
|
|
||||||
# Analyze
|
# Fetch content
|
||||||
analysis = analyze_url(url)
|
fetch_result = fetch_url_content(url)
|
||||||
|
title = fetch_result["title"]
|
||||||
|
|
||||||
|
# Analyze with gateway
|
||||||
|
analysis = None
|
||||||
|
if fetch_result["status"] == "ok":
|
||||||
|
analysis = analyze_with_gateway(url, title, fetch_result.get("content", ""))
|
||||||
|
|
||||||
# Add to Tududi
|
# Add to Tududi
|
||||||
add_to_tududi(analysis["title"], url, analysis["type"])
|
tududi_ok = add_to_tududi(title, url, link_type, analysis or "")
|
||||||
|
|
||||||
# Prepare response
|
# Format response for Discord
|
||||||
summary = f"📌 **{analysis['type']}**: {analysis['title']}"
|
response_text = f"📌 **{link_type}**: {title}"
|
||||||
if analysis["status"] == "error":
|
if analysis:
|
||||||
summary += f"\n⚠️ {analysis['error']}"
|
# Truncate to 200 chars for Discord
|
||||||
|
summary = analysis[:200].split('\n')[0]
|
||||||
|
response_text += f"\n💡 {summary}"
|
||||||
|
|
||||||
# Post summary in channel
|
logger.debug(f"Posting response: {response_text}")
|
||||||
await message.reply(summary, mention_author=False)
|
|
||||||
|
|
||||||
# Add to tracker
|
# Post in channel
|
||||||
|
await message.reply(response_text, mention_author=False)
|
||||||
|
|
||||||
|
# Update tracker
|
||||||
tracker["links"].append({
|
tracker["links"].append({
|
||||||
"url": url,
|
"url": url,
|
||||||
"title": analysis["title"],
|
"title": title,
|
||||||
"type": analysis["type"],
|
"type": link_type,
|
||||||
"author": str(message.author),
|
"author": str(message.author),
|
||||||
"message_id": message.id,
|
"message_id": message.id,
|
||||||
"date": datetime.now().isoformat(),
|
"date": datetime.now().isoformat(),
|
||||||
"tududi": True
|
"analysis": analysis,
|
||||||
|
"tududi": tududi_ok,
|
||||||
|
"fetch_status": fetch_result["status"]
|
||||||
})
|
})
|
||||||
|
|
||||||
|
logger.info(f"✓ Processed: {url}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ Error processing {url}: {e}")
|
||||||
|
await message.reply(f"❌ Error analyzing link: {e}", mention_author=False)
|
||||||
|
|
||||||
# Update processed IDs
|
# Update processed IDs
|
||||||
tracker["processed_message_ids"].append(message.id)
|
tracker["processed_message_ids"].append(message.id)
|
||||||
save_tracker(tracker)
|
save_tracker(tracker)
|
||||||
|
logger.info(f"Updated tracker, total links: {len(tracker['links'])}")
|
||||||
|
|
||||||
# Main
|
# Main
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
token = os.getenv("DISCORD_BOT_TOKEN")
|
token = os.getenv("DISCORD_BOT_TOKEN")
|
||||||
if not token:
|
if not token:
|
||||||
print("❌ DISCORD_BOT_TOKEN not set!")
|
logger.error("❌ DISCORD_BOT_TOKEN not set!")
|
||||||
print("Set it: export DISCORD_BOT_TOKEN='your_token'")
|
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
logger.info("Starting bot...")
|
||||||
bot = LinkAnalyzerBot(intents=intents)
|
bot = LinkAnalyzerBot(intents=intents)
|
||||||
bot.run(token)
|
bot.run(token)
|
||||||
|
|||||||
Reference in New Issue
Block a user