Improve: Local heuristic-based analysis (no subprocess needed, fast)
This commit is contained in:
143
bot.py
143
bot.py
@@ -14,8 +14,6 @@ from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
import logging
|
||||
from urllib.parse import urlparse
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
# Load .env file
|
||||
load_dotenv()
|
||||
@@ -159,79 +157,90 @@ def fetch_url_content(url):
|
||||
logger.error(f" ❌ Error: {e}")
|
||||
return {"title": "Fetch failed", "status": "error", "error": str(e), "content": ""}
|
||||
|
||||
# Analyze content with AI (Haiku via gateway)
|
||||
# Analyze content (local heuristic-based)
|
||||
def analyze_content(url, title, content, link_type):
|
||||
"""Analyze content and create intelligent summary"""
|
||||
"""Analyze content and suggest summary + tag locally"""
|
||||
logger.debug(f" 🤖 Analyzing content: {url}")
|
||||
|
||||
# Build analysis prompt
|
||||
analysis_prompt = f"""Analyze this link and create a brief summary useful for Laurent.
|
||||
|
||||
**Link**: {link_type} - {title}
|
||||
**URL**: {url}
|
||||
|
||||
**Content (first 1500 chars)**:
|
||||
{content[:1500]}
|
||||
|
||||
---
|
||||
|
||||
Respond in JSON format ONLY (no markdown, no explanation):
|
||||
{{
|
||||
"summary": "1-2 sentences max: What is it? Why would Laurent find it useful?",
|
||||
"tag": "one of: to-read, tool, inspiration, learning, reference, interesting, project, tutorial, article, code, security",
|
||||
"relevance": "very-relevant OR relevant OR nice-to-have"
|
||||
}}
|
||||
|
||||
Be concise and practical."""
|
||||
|
||||
try:
|
||||
# Use OpenClaw CLI to invoke sessions_spawn
|
||||
# This spawns a sub-agent that analyzes the content
|
||||
result = subprocess.run(
|
||||
[
|
||||
sys.executable, "-m", "openclaw",
|
||||
"sessions", "spawn",
|
||||
"--task", analysis_prompt,
|
||||
"--model", "openrouter/anthropic/claude-haiku-4.5",
|
||||
"--thinking", "off",
|
||||
"--timeout", "15"
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=20
|
||||
)
|
||||
# Extract useful info from HTML content
|
||||
description = ""
|
||||
|
||||
# Looking for meta description
|
||||
desc_match = re.search(r'<meta\s+name="description"\s+content="([^"]+)"', content, re.IGNORECASE)
|
||||
if desc_match:
|
||||
description = desc_match.group(1).strip()
|
||||
|
||||
# Looking for og:description
|
||||
if not description:
|
||||
og_desc = re.search(r'<meta\s+property="og:description"\s+content="([^"]+)"', content, re.IGNORECASE)
|
||||
if og_desc:
|
||||
description = og_desc.group(1).strip()
|
||||
|
||||
# Looking for first paragraph after title
|
||||
if not description:
|
||||
p_match = re.search(r'<p[^>]*>([^<]+)</p>', content, re.IGNORECASE)
|
||||
if p_match:
|
||||
description = p_match.group(1).strip()[:200]
|
||||
|
||||
# Determine tag based on content + URL + type
|
||||
tag = "interesting"
|
||||
summary = ""
|
||||
|
||||
if link_type == "GitHub":
|
||||
tag = "project"
|
||||
summary = f"GitHub repository: {title}"
|
||||
# Try to extract more info from README
|
||||
readme_match = re.search(r'README[^<]*</h[1-3]>[^<]*<p[^>]*>([^<]+)', content, re.IGNORECASE)
|
||||
if readme_match:
|
||||
summary += f". {readme_match.group(1)[:100]}"
|
||||
|
||||
elif link_type == "YouTube":
|
||||
tag = "video"
|
||||
summary = f"Video: {title}"
|
||||
if description:
|
||||
summary += f". {description[:80]}"
|
||||
|
||||
elif link_type == "Reddit":
|
||||
tag = "discussion"
|
||||
summary = f"Reddit discussion: {title}"
|
||||
|
||||
elif link_type == "Medium" or link_type == "Dev.to":
|
||||
tag = "article"
|
||||
summary = f"Article: {title}"
|
||||
if description:
|
||||
summary += f". {description[:80]}"
|
||||
|
||||
elif link_type == "arXiv":
|
||||
tag = "learning"
|
||||
summary = f"Research paper: {title}"
|
||||
|
||||
if result.returncode == 0:
|
||||
output = result.stdout
|
||||
logger.debug(f" Sub-agent response: {output[:200]}")
|
||||
|
||||
# Try to parse JSON
|
||||
try:
|
||||
json_match = re.search(r'\{[^{}]*"summary"[^{}]*\}', output, re.DOTALL)
|
||||
if json_match:
|
||||
analysis_data = json.loads(json_match.group())
|
||||
logger.debug(f" ✓ Analysis parsed successfully")
|
||||
return analysis_data
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Fallback: extract summary from text
|
||||
summary_line = output.split('\n')[0][:200]
|
||||
return {
|
||||
"summary": summary_line,
|
||||
"tag": "interesting",
|
||||
"relevance": "relevant"
|
||||
}
|
||||
else:
|
||||
logger.warning(f" Sub-agent error: {result.stderr[:200]}")
|
||||
return None
|
||||
# Generic web article
|
||||
tag = "to-read"
|
||||
summary = title
|
||||
if description:
|
||||
summary += f". {description[:100]}"
|
||||
|
||||
# Truncate summary to reasonable length
|
||||
summary = summary[:200]
|
||||
|
||||
logger.debug(f" ✓ Tag: {tag}, Summary: {summary[:80]}")
|
||||
|
||||
return {
|
||||
"summary": summary,
|
||||
"tag": tag,
|
||||
"relevance": "relevant"
|
||||
}
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.warning(f" Analysis timeout")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f" Analysis error: {e}")
|
||||
return None
|
||||
logger.error(f" Analysis error: {e}")
|
||||
# Return minimal analysis
|
||||
return {
|
||||
"summary": title,
|
||||
"tag": "interesting",
|
||||
"relevance": "relevant"
|
||||
}
|
||||
|
||||
# Send to Tududi inbox
|
||||
def add_to_tududi(title, url, link_type, summary="", tag=""):
|
||||
|
||||
Reference in New Issue
Block a user