Feature: Real AI-powered summaries via gateway (explains content + utility)
This commit is contained in:
166
bot.py
166
bot.py
@@ -157,99 +157,137 @@ def fetch_url_content(url):
|
|||||||
logger.error(f" ❌ Error: {e}")
|
logger.error(f" ❌ Error: {e}")
|
||||||
return {"title": "Fetch failed", "status": "error", "error": str(e), "content": ""}
|
return {"title": "Fetch failed", "status": "error", "error": str(e), "content": ""}
|
||||||
|
|
||||||
# Analyze content (local heuristic-based)
|
# Extract clean text from HTML
|
||||||
|
def extract_text_from_html(html):
|
||||||
|
"""Extract readable text from HTML"""
|
||||||
|
# Remove scripts and styles
|
||||||
|
text = re.sub(r'<script[^>]*>.*?</script>', '', html, flags=re.DOTALL | re.IGNORECASE)
|
||||||
|
text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL | re.IGNORECASE)
|
||||||
|
# Remove HTML tags
|
||||||
|
text = re.sub(r'<[^>]+>', ' ', text)
|
||||||
|
# Clean up whitespace
|
||||||
|
text = re.sub(r'\s+', ' ', text)
|
||||||
|
text = text.strip()
|
||||||
|
return text[:2000] # First 2000 chars of clean text
|
||||||
|
|
||||||
|
# Analyze content with Haiku via gateway
|
||||||
def analyze_content(url, title, content, link_type):
|
def analyze_content(url, title, content, link_type):
|
||||||
"""Analyze content and suggest summary + tag locally"""
|
"""Analyze content with AI to create intelligent summary"""
|
||||||
logger.debug(f" 🤖 Analyzing content: {url}")
|
logger.debug(f" 🤖 Analyzing content: {url}")
|
||||||
logger.debug(f" Content length: {len(content)} chars")
|
|
||||||
logger.debug(f" Link type: {link_type}")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Extract useful info from HTML content
|
# Extract clean text
|
||||||
description = ""
|
clean_text = extract_text_from_html(content)
|
||||||
|
logger.debug(f" Extracted {len(clean_text)} chars of clean text")
|
||||||
|
|
||||||
# Looking for meta description
|
# Build analysis prompt
|
||||||
desc_match = re.search(r'<meta\s+name="description"\s+content="([^"]+)"', content, re.IGNORECASE)
|
prompt = f"""Analyze this webpage and create a brief summary for Laurent.
|
||||||
if desc_match:
|
|
||||||
description = desc_match.group(1).strip()
|
|
||||||
logger.debug(f" Found meta description: {description[:80]}")
|
|
||||||
|
|
||||||
# Looking for og:description
|
**Title**: {title}
|
||||||
if not description:
|
**URL**: {url}
|
||||||
og_desc = re.search(r'<meta\s+property="og:description"\s+content="([^"]+)"', content, re.IGNORECASE)
|
**Link Type**: {link_type}
|
||||||
if og_desc:
|
|
||||||
description = og_desc.group(1).strip()
|
|
||||||
logger.debug(f" Found og:description: {description[:80]}")
|
|
||||||
|
|
||||||
# Looking for first paragraph after title
|
**Content** (first 1500 chars):
|
||||||
if not description:
|
{clean_text[:1500]}
|
||||||
p_match = re.search(r'<p[^>]*>([^<]+)</p>', content, re.IGNORECASE)
|
|
||||||
if p_match:
|
|
||||||
description = p_match.group(1).strip()[:200]
|
|
||||||
|
|
||||||
# Determine tag based on content + URL + type
|
---
|
||||||
tag = "interesting"
|
|
||||||
summary = ""
|
|
||||||
|
|
||||||
if link_type == "GitHub":
|
Create a 2-3 sentence summary that answers:
|
||||||
tag = "project"
|
1. What is this page about?
|
||||||
summary = f"GitHub repository: {title}"
|
2. Why would Laurent find it useful?
|
||||||
# Try to extract more info from README
|
|
||||||
readme_match = re.search(r'README[^<]*</h[1-3]>[^<]*<p[^>]*>([^<]+)', content, re.IGNORECASE)
|
|
||||||
if readme_match:
|
|
||||||
summary += f". {readme_match.group(1)[:100]}"
|
|
||||||
|
|
||||||
elif link_type == "YouTube":
|
Keep it practical and concise. Do NOT include the URL or title in the summary.
|
||||||
tag = "video"
|
"""
|
||||||
summary = f"Video: {title}"
|
|
||||||
if description:
|
|
||||||
summary += f". {description[:80]}"
|
|
||||||
|
|
||||||
elif link_type == "Reddit":
|
# Call gateway with a simple POST
|
||||||
tag = "discussion"
|
logger.debug(f" Sending to gateway for analysis...")
|
||||||
summary = f"Reddit discussion: {title}"
|
response = requests.post(
|
||||||
|
"http://127.0.0.1:18789/sessions/turn",
|
||||||
|
json={
|
||||||
|
"message": prompt,
|
||||||
|
"session": "main"
|
||||||
|
},
|
||||||
|
timeout=15,
|
||||||
|
headers={"Authorization": f"Bearer {GATEWAY_TOKEN}"} if GATEWAY_TOKEN else {}
|
||||||
|
)
|
||||||
|
|
||||||
elif link_type == "Medium" or link_type == "Dev.to":
|
if response.status_code == 200:
|
||||||
tag = "article"
|
result = response.json()
|
||||||
summary = f"Article: {title}"
|
# Extract the summary from response
|
||||||
if description:
|
summary = result.get("message", "") or result.get("content", "")
|
||||||
summary += f". {description[:80]}"
|
if isinstance(summary, list):
|
||||||
|
summary = summary[0].get("text", "") if summary else ""
|
||||||
|
summary = summary.strip()[:300]
|
||||||
|
|
||||||
elif link_type == "arXiv":
|
logger.info(f" ✓ Got summary from gateway: {summary[:60]}")
|
||||||
tag = "learning"
|
|
||||||
summary = f"Research paper: {title}"
|
|
||||||
|
|
||||||
else:
|
# Determine tag from link type
|
||||||
# Generic web article
|
|
||||||
tag = "to-read"
|
tag = "to-read"
|
||||||
summary = title
|
if link_type == "GitHub":
|
||||||
if description:
|
tag = "project"
|
||||||
summary += f". {description[:100]}"
|
elif link_type == "YouTube":
|
||||||
|
tag = "video"
|
||||||
|
elif link_type == "Reddit":
|
||||||
|
tag = "discussion"
|
||||||
|
elif link_type in ["Medium", "Dev.to"]:
|
||||||
|
tag = "article"
|
||||||
|
elif link_type == "arXiv":
|
||||||
|
tag = "learning"
|
||||||
|
|
||||||
# Truncate summary to reasonable length
|
return {
|
||||||
summary = summary[:200]
|
"summary": summary,
|
||||||
|
"tag": tag,
|
||||||
|
"relevance": "relevant"
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
logger.warning(f" Gateway error {response.status_code}, falling back to heuristic")
|
||||||
|
# Fallback: use simple heuristic
|
||||||
|
return {
|
||||||
|
"summary": extract_simple_summary(clean_text, title, link_type),
|
||||||
|
"tag": get_tag_from_type(link_type),
|
||||||
|
"relevance": "relevant"
|
||||||
|
}
|
||||||
|
|
||||||
logger.info(f" ✓ Analysis complete - Tag: {tag}, Summary: {summary[:60]}")
|
except requests.Timeout:
|
||||||
|
logger.warning(f" Gateway timeout, using fallback")
|
||||||
result = {
|
return {
|
||||||
"summary": summary,
|
"summary": extract_simple_summary(content, title, link_type),
|
||||||
"tag": tag,
|
"tag": get_tag_from_type(link_type),
|
||||||
"relevance": "relevant"
|
"relevance": "relevant"
|
||||||
}
|
}
|
||||||
logger.debug(f" Returning: {result}")
|
|
||||||
return result
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f" Analysis error: {e}")
|
logger.error(f" Analysis error: {e}")
|
||||||
import traceback
|
import traceback
|
||||||
logger.error(traceback.format_exc())
|
logger.error(traceback.format_exc())
|
||||||
# Return minimal analysis
|
|
||||||
return {
|
return {
|
||||||
"summary": title,
|
"summary": title,
|
||||||
"tag": "interesting",
|
"tag": "interesting",
|
||||||
"relevance": "relevant"
|
"relevance": "relevant"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def extract_simple_summary(text, title, link_type):
|
||||||
|
"""Fallback: extract a simple summary from text"""
|
||||||
|
# Get first non-empty sentence/paragraph
|
||||||
|
sentences = re.split(r'[.!?]', text)
|
||||||
|
for sent in sentences:
|
||||||
|
sent = sent.strip()
|
||||||
|
if len(sent) > 20 and len(sent) < 300:
|
||||||
|
return sent[:200]
|
||||||
|
return title
|
||||||
|
|
||||||
|
def get_tag_from_type(link_type):
|
||||||
|
"""Get tag based on link type"""
|
||||||
|
tags = {
|
||||||
|
"GitHub": "project",
|
||||||
|
"YouTube": "video",
|
||||||
|
"Reddit": "discussion",
|
||||||
|
"Medium": "article",
|
||||||
|
"Dev.to": "article",
|
||||||
|
"arXiv": "learning",
|
||||||
|
"Twitter/X": "discussion"
|
||||||
|
}
|
||||||
|
return tags.get(link_type, "to-read")
|
||||||
|
|
||||||
# Send to Tududi inbox
|
# Send to Tududi inbox
|
||||||
def add_to_tududi(title, url, link_type, summary="", tag=""):
|
def add_to_tududi(title, url, link_type, summary="", tag=""):
|
||||||
"""Add to Tududi inbox with intelligent summary"""
|
"""Add to Tududi inbox with intelligent summary"""
|
||||||
|
|||||||
Reference in New Issue
Block a user