Feature: Real AI-powered summaries via gateway (explains content + utility)

This commit is contained in:
Remora
2026-02-09 19:17:37 +01:00
parent e42e5ca563
commit d0ca96191c

182
bot.py
View File

@@ -157,99 +157,137 @@ def fetch_url_content(url):
logger.error(f" ❌ Error: {e}")
return {"title": "Fetch failed", "status": "error", "error": str(e), "content": ""}
# Analyze content (local heuristic-based)
# Extract clean text from HTML
def extract_text_from_html(html):
"""Extract readable text from HTML"""
# Remove scripts and styles
text = re.sub(r'<script[^>]*>.*?</script>', '', html, flags=re.DOTALL | re.IGNORECASE)
text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL | re.IGNORECASE)
# Remove HTML tags
text = re.sub(r'<[^>]+>', ' ', text)
# Clean up whitespace
text = re.sub(r'\s+', ' ', text)
text = text.strip()
return text[:2000] # First 2000 chars of clean text
# Analyze content with Haiku via gateway
def analyze_content(url, title, content, link_type):
"""Analyze content and suggest summary + tag locally"""
"""Analyze content with AI to create intelligent summary"""
logger.debug(f" 🤖 Analyzing content: {url}")
logger.debug(f" Content length: {len(content)} chars")
logger.debug(f" Link type: {link_type}")
try:
# Extract useful info from HTML content
description = ""
# Extract clean text
clean_text = extract_text_from_html(content)
logger.debug(f" Extracted {len(clean_text)} chars of clean text")
# Looking for meta description
desc_match = re.search(r'<meta\s+name="description"\s+content="([^"]+)"', content, re.IGNORECASE)
if desc_match:
description = desc_match.group(1).strip()
logger.debug(f" Found meta description: {description[:80]}")
# Build analysis prompt
prompt = f"""Analyze this webpage and create a brief summary for Laurent.
**Title**: {title}
**URL**: {url}
**Link Type**: {link_type}
**Content** (first 1500 chars):
{clean_text[:1500]}
---
Create a 2-3 sentence summary that answers:
1. What is this page about?
2. Why would Laurent find it useful?
Keep it practical and concise. Do NOT include the URL or title in the summary.
"""
# Looking for og:description
if not description:
og_desc = re.search(r'<meta\s+property="og:description"\s+content="([^"]+)"', content, re.IGNORECASE)
if og_desc:
description = og_desc.group(1).strip()
logger.debug(f" Found og:description: {description[:80]}")
# Call gateway with a simple POST
logger.debug(f" Sending to gateway for analysis...")
response = requests.post(
"http://127.0.0.1:18789/sessions/turn",
json={
"message": prompt,
"session": "main"
},
timeout=15,
headers={"Authorization": f"Bearer {GATEWAY_TOKEN}"} if GATEWAY_TOKEN else {}
)
# Looking for first paragraph after title
if not description:
p_match = re.search(r'<p[^>]*>([^<]+)</p>', content, re.IGNORECASE)
if p_match:
description = p_match.group(1).strip()[:200]
# Determine tag based on content + URL + type
tag = "interesting"
summary = ""
if link_type == "GitHub":
tag = "project"
summary = f"GitHub repository: {title}"
# Try to extract more info from README
readme_match = re.search(r'README[^<]*</h[1-3]>[^<]*<p[^>]*>([^<]+)', content, re.IGNORECASE)
if readme_match:
summary += f". {readme_match.group(1)[:100]}"
elif link_type == "YouTube":
tag = "video"
summary = f"Video: {title}"
if description:
summary += f". {description[:80]}"
elif link_type == "Reddit":
tag = "discussion"
summary = f"Reddit discussion: {title}"
elif link_type == "Medium" or link_type == "Dev.to":
tag = "article"
summary = f"Article: {title}"
if description:
summary += f". {description[:80]}"
elif link_type == "arXiv":
tag = "learning"
summary = f"Research paper: {title}"
else:
# Generic web article
if response.status_code == 200:
result = response.json()
# Extract the summary from response
summary = result.get("message", "") or result.get("content", "")
if isinstance(summary, list):
summary = summary[0].get("text", "") if summary else ""
summary = summary.strip()[:300]
logger.info(f" ✓ Got summary from gateway: {summary[:60]}")
# Determine tag from link type
tag = "to-read"
summary = title
if description:
summary += f". {description[:100]}"
if link_type == "GitHub":
tag = "project"
elif link_type == "YouTube":
tag = "video"
elif link_type == "Reddit":
tag = "discussion"
elif link_type in ["Medium", "Dev.to"]:
tag = "article"
elif link_type == "arXiv":
tag = "learning"
return {
"summary": summary,
"tag": tag,
"relevance": "relevant"
}
else:
logger.warning(f" Gateway error {response.status_code}, falling back to heuristic")
# Fallback: use simple heuristic
return {
"summary": extract_simple_summary(clean_text, title, link_type),
"tag": get_tag_from_type(link_type),
"relevance": "relevant"
}
# Truncate summary to reasonable length
summary = summary[:200]
logger.info(f" ✓ Analysis complete - Tag: {tag}, Summary: {summary[:60]}")
result = {
"summary": summary,
"tag": tag,
except requests.Timeout:
logger.warning(f" Gateway timeout, using fallback")
return {
"summary": extract_simple_summary(content, title, link_type),
"tag": get_tag_from_type(link_type),
"relevance": "relevant"
}
logger.debug(f" Returning: {result}")
return result
except Exception as e:
logger.error(f" Analysis error: {e}")
import traceback
logger.error(traceback.format_exc())
# Return minimal analysis
return {
"summary": title,
"tag": "interesting",
"relevance": "relevant"
}
def extract_simple_summary(text, title, link_type):
"""Fallback: extract a simple summary from text"""
# Get first non-empty sentence/paragraph
sentences = re.split(r'[.!?]', text)
for sent in sentences:
sent = sent.strip()
if len(sent) > 20 and len(sent) < 300:
return sent[:200]
return title
def get_tag_from_type(link_type):
"""Get tag based on link type"""
tags = {
"GitHub": "project",
"YouTube": "video",
"Reddit": "discussion",
"Medium": "article",
"Dev.to": "article",
"arXiv": "learning",
"Twitter/X": "discussion"
}
return tags.get(link_type, "to-read")
# Send to Tududi inbox
def add_to_tududi(title, url, link_type, summary="", tag=""):
"""Add to Tududi inbox with intelligent summary"""