diff --git a/bot.py b/bot.py
index 46ca311..3ba7807 100644
--- a/bot.py
+++ b/bot.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
-Discord bot for #remora channel - analyzes links in real-time with web_fetch + AI
-Posts summaries, adds to Tududi inbox, maintains JSON history + logs
+Discord bot for #remora channel - analyzes links in real-time with Haiku
+Fetches content, sends to gateway for AI analysis, adds to Tududi inbox
"""
import discord
@@ -88,9 +88,9 @@ def detect_link_type(url):
else:
return "Article"
-# Fetch URL content using requests
+# Fetch URL content
def fetch_url_content(url):
- """Fetch URL and return title + excerpt"""
+ """Fetch URL and return content"""
logger.debug(f" š„ Fetching: {url}")
try:
@@ -104,229 +104,78 @@ def fetch_url_content(url):
allow_redirects=True
)
response.raise_for_status()
- content = response.text[:4000] # First 4k chars
+ content = response.text[:5000] # First 5k chars
- # Try multiple patterns for title
+ # Try to find title
title = None
-
- # Pattern 1:
tag
title_match = re.search(r']*>\s*([^<]+?)\s*', content, re.IGNORECASE)
if title_match:
title = title_match.group(1).strip()
- # Pattern 2: og:title meta tag (for GitHub, etc.)
if not title:
og_match = re.search(r']*>([^<]+)', content, re.IGNORECASE)
- if h1_match:
- title = h1_match.group(1).strip()
-
- # Fallback
if not title:
title = url.split('/')[-1] or "Untitled"
- # Extract meta description
- desc_match = re.search(r' 50:
- logger.debug(f" Got README: {len(content)} chars after cleaning")
- return content[:2000]
-
- # Fallback: get repo info JSON
- response = requests.get(
- f"https://api.github.com/repos/{owner}/{repo}",
- timeout=5
- )
-
- if response.status_code == 200:
- data = response.json()
- # Collect useful info
- parts = []
- if data.get("description"):
- parts.append(data["description"])
- if data.get("topics"):
- parts.append(f"Topics: {', '.join(data['topics'][:3])}")
- if data.get("language"):
- parts.append(f"Language: {data['language']}")
-
- content = " ".join(parts)
- if content:
- logger.debug(f" Got repo info: {len(content)} chars")
- return content
-
- except Exception as e:
- logger.warning(f" GitHub API error: {e}")
-
- return None
-
-# Extract clean text from HTML
-def extract_text_from_html(html):
- """Extract readable text from HTML"""
- # Remove DOCTYPE, comments
- text = re.sub(r']*>', '', html, flags=re.IGNORECASE)
- text = re.sub(r'', '', text, flags=re.DOTALL)
-
- # Remove scripts, styles, noscript
- text = re.sub(r'', ' ', text, flags=re.DOTALL | re.IGNORECASE)
- text = re.sub(r'', ' ', text, flags=re.DOTALL | re.IGNORECASE)
- text = re.sub(r'', ' ', text, flags=re.DOTALL | re.IGNORECASE)
-
- # Try to extract main content areas first
- main_match = re.search(r'<(main|article|div[^>]*class="[^"]*content[^"]*"[^>]*)>.*?\1>', text, flags=re.DOTALL | re.IGNORECASE)
- if main_match:
- text = main_match.group(0)
-
- # Remove common nav/footer/sidebar patterns
- text = re.sub(r'<(nav|footer|aside|header)[^>]*>.*?\1>', ' ', text, flags=re.DOTALL | re.IGNORECASE)
-
- # Remove remaining HTML tags but keep text
- text = re.sub(r'
', '\n', text, flags=re.IGNORECASE)
- text = re.sub(r']*>', '\n', text, flags=re.IGNORECASE)
- text = re.sub(r']*>', '\n', text, flags=re.IGNORECASE)
- text = re.sub(r'?[^>]+>', ' ', text)
-
- # Decode HTML entities
- text = text.replace(' ', ' ')
- text = text.replace('<', '<')
- text = text.replace('>', '>')
- text = text.replace('&', '&')
-
- # Clean up whitespace
- text = re.sub(r'\s+', ' ', text)
- text = text.strip()
-
- # Return first meaningful chunk
- if len(text) < 100:
- return None
-
- return text[:2000] # First 2000 chars of clean text
-
-# Analyze content with Haiku via gateway
+# Analyze with Haiku via gateway
def analyze_content(url, title, content, link_type):
- """Analyze content with AI to create intelligent summary"""
- logger.debug(f" š¤ Analyzing content: {url}")
+ """Send raw content to Haiku for analysis"""
+ logger.debug(f" š¤ Analyzing with Haiku: {url}")
try:
- # Special handling for GitHub
- clean_text = None
- if link_type == "GitHub":
- clean_text = get_github_content(url)
-
- # Fallback: extract from HTML
- if not clean_text:
- clean_text = extract_text_from_html(content)
-
- if not clean_text:
- logger.warning(f" Could not extract content")
- return {
- "summary": f"GitHub project: {title}",
- "tag": "project",
- "relevance": "relevant"
- }
-
- logger.debug(f" Extracted {len(clean_text)} chars of content")
-
- # Build analysis prompt
- prompt = f"""Analyze this webpage and create a brief summary for Laurent.
+ # Simple prompt - send raw content
+ prompt = f"""Analyze this webpage content quickly.
-**Title**: {title}
**URL**: {url}
-**Link Type**: {link_type}
+**Title**: {title}
-**Content** (first 1500 chars):
-{clean_text[:1500]}
+**RAW PAGE CONTENT** (first 3000 chars):
+{content[:3000]}
---
-Create a 2-3 sentence summary that answers:
-1. What is this page about?
-2. Why would Laurent find it useful?
+Write a 2-3 sentence summary explaining:
+1. What is this about?
+2. Why would this be useful?
-Keep it practical and concise. Do NOT include the URL or title in the summary.
-"""
+Be concise. Skip marketing. No URLs or titles in summary."""
- # Call gateway with a simple POST
- logger.debug(f" Sending to gateway for analysis...")
+ logger.debug(f" Sending to Haiku...")
response = requests.post(
- "http://127.0.0.1:18789/sessions/turn",
+ f"{GATEWAY_URL}/sessions/turn",
json={
"message": prompt,
"session": "main"
},
- timeout=15,
+ timeout=20,
headers={"Authorization": f"Bearer {GATEWAY_TOKEN}"} if GATEWAY_TOKEN else {}
)
if response.status_code == 200:
result = response.json()
- # Extract the summary from response
- summary = result.get("message", "") or result.get("content", "")
- if isinstance(summary, list):
- summary = summary[0].get("text", "") if summary else ""
- summary = summary.strip()[:300]
+ summary = result.get("message", "")
+ if isinstance(summary, list) and summary:
+ summary = summary[0].get("text", "") if isinstance(summary[0], dict) else summary[0]
+ summary = str(summary).strip()[:300]
- logger.info(f" ā Got summary from gateway: {summary[:60]}")
+ logger.info(f" ā Got analysis: {summary[:50]}")
- # Determine tag from link type
tag = "to-read"
if link_type == "GitHub":
tag = "project"
@@ -341,61 +190,19 @@ Keep it practical and concise. Do NOT include the URL or title in the summary.
return {
"summary": summary,
- "tag": tag,
- "relevance": "relevant"
+ "tag": tag
}
else:
- logger.warning(f" Gateway error {response.status_code}, falling back to heuristic")
- # Fallback: use simple heuristic
- return {
- "summary": extract_simple_summary(clean_text, title, link_type),
- "tag": get_tag_from_type(link_type),
- "relevance": "relevant"
- }
+ logger.warning(f" Gateway error: {response.status_code}")
+ return None
- except requests.Timeout:
- logger.warning(f" Gateway timeout, using fallback")
- return {
- "summary": extract_simple_summary(content, title, link_type),
- "tag": get_tag_from_type(link_type),
- "relevance": "relevant"
- }
except Exception as e:
logger.error(f" Analysis error: {e}")
- import traceback
- logger.error(traceback.format_exc())
- return {
- "summary": title,
- "tag": "interesting",
- "relevance": "relevant"
- }
-
-def extract_simple_summary(text, title, link_type):
- """Fallback: extract a simple summary from text"""
- # Get first non-empty sentence/paragraph
- sentences = re.split(r'[.!?]', text)
- for sent in sentences:
- sent = sent.strip()
- if len(sent) > 20 and len(sent) < 300:
- return sent[:200]
- return title
-
-def get_tag_from_type(link_type):
- """Get tag based on link type"""
- tags = {
- "GitHub": "project",
- "YouTube": "video",
- "Reddit": "discussion",
- "Medium": "article",
- "Dev.to": "article",
- "arXiv": "learning",
- "Twitter/X": "discussion"
- }
- return tags.get(link_type, "to-read")
+ return None
# Send to Tududi inbox
def add_to_tududi(title, url, link_type, summary="", tag=""):
- """Add to Tududi inbox with intelligent summary"""
+ """Add to Tududi inbox"""
logger.debug(f" š Adding to Tududi: {title}")
try:
@@ -403,14 +210,11 @@ def add_to_tududi(title, url, link_type, summary="", tag=""):
logger.warning(" TUDUDI_API_KEY not set")
return False
- # Format the inbox content
content = f"š **{link_type}**: {title}\nš {url}"
-
if summary:
- content += f"\n\nš” **Summary**:\n{summary}"
-
+ content += f"\n\nš” {summary}"
if tag:
- content += f"\n\nš·ļø **Tag**: {tag}"
+ content += f"\n\nš·ļø {tag}"
response = requests.post(
f"{TUDUDI_API_URL}/inbox",
@@ -422,8 +226,8 @@ def add_to_tududi(title, url, link_type, summary="", tag=""):
timeout=5
)
- if response.status_code in [200, 201]: # 200 or 201 are both OK
- logger.info(f" ā Added to Tududi inbox with tag: {tag}")
+ if response.status_code in [200, 201]:
+ logger.info(f" ā Added to Tududi")
return True
else:
logger.warning(f" Tududi error: {response.status_code}")
@@ -474,38 +278,31 @@ class LinkAnalyzerBot(discord.Client):
fetch_result = fetch_url_content(url)
title = fetch_result["title"]
- # Analyze content if fetch was successful
+ # Analyze with Haiku
analysis_data = None
- logger.debug(f" š Fetch status: {fetch_result['status']}")
-
if fetch_result["status"] == "ok":
- logger.debug(f" š Starting analysis...")
- analysis_data = analyze_content(url, title, fetch_result.get("content", ""), link_type)
- logger.debug(f" Analysis result: {analysis_data}")
- else:
- logger.debug(f" ā ļø Fetch failed, skipping analysis")
+ logger.debug(f" Analyzing...")
+ analysis_data = analyze_content(url, title, fetch_result["content"], link_type)
- # Prepare summary for Tududi
+ # Prepare summary
summary_text = ""
tag = "interesting"
if analysis_data:
summary_text = analysis_data.get("summary", "")
tag = analysis_data.get("tag", "interesting")
- logger.debug(f" ā Got summary: {summary_text[:80]}")
- else:
- logger.warning(f" ā No analysis data returned")
+ logger.debug(f" Summary: {summary_text[:60]}")
- # Add to Tududi with summary
- tududi_ok = add_to_tududi(title, url, link_type, summary_text, tag)
+ # Add to Tududi
+ add_to_tududi(title, url, link_type, summary_text, tag)
- # Format response for Discord
+ # Format response
response_text = f"š **{link_type}**: {title}"
if summary_text:
response_text += f"\n\nš” {summary_text}"
if tag:
- response_text += f"\n\nš·ļø Tag: `{tag}`"
+ response_text += f"\n\nš·ļø `{tag}`"
- logger.debug(f"Posting response: {response_text}")
+ logger.debug(f"Posting response...")
# Post in channel
await message.reply(response_text, mention_author=False)
@@ -518,21 +315,25 @@ class LinkAnalyzerBot(discord.Client):
"author": str(message.author),
"message_id": message.id,
"date": datetime.now().isoformat(),
- "analysis": analysis_data,
- "tududi": tududi_ok,
- "fetch_status": fetch_result["status"]
+ "summary": summary_text,
+ "tag": tag
})
logger.info(f"ā Processed: {url}")
except Exception as e:
- logger.error(f"ā Error processing {url}: {e}")
- await message.reply(f"ā Error analyzing link: {e}", mention_author=False)
+ logger.error(f"ā Error: {e}")
+ import traceback
+ logger.error(traceback.format_exc())
+ try:
+ await message.reply(f"ā Error: {str(e)[:100]}", mention_author=False)
+ except:
+ pass
- # Update processed IDs
+ # Update tracker
tracker["processed_message_ids"].append(message.id)
save_tracker(tracker)
- logger.info(f"Updated tracker, total links: {len(tracker['links'])}")
+ logger.info(f"Updated tracker: {len(tracker['links'])} links total")
# Main
if __name__ == "__main__":