Improve: Local heuristic-based analysis (no subprocess needed, fast)

This commit is contained in:
Remora
2026-02-09 19:05:31 +01:00
parent eaaa297d9e
commit e4421d7bc9

143
bot.py
View File

@@ -14,8 +14,6 @@ from pathlib import Path
from dotenv import load_dotenv
import logging
from urllib.parse import urlparse
import subprocess
import sys
# Load .env file
load_dotenv()
@@ -159,79 +157,90 @@ def fetch_url_content(url):
logger.error(f" ❌ Error: {e}")
return {"title": "Fetch failed", "status": "error", "error": str(e), "content": ""}
# Analyze content with AI (Haiku via gateway)
# Analyze content (local heuristic-based)
def analyze_content(url, title, content, link_type):
"""Analyze content and create intelligent summary"""
"""Analyze content and suggest summary + tag locally"""
logger.debug(f" 🤖 Analyzing content: {url}")
# Build analysis prompt
analysis_prompt = f"""Analyze this link and create a brief summary useful for Laurent.
**Link**: {link_type} - {title}
**URL**: {url}
**Content (first 1500 chars)**:
{content[:1500]}
---
Respond in JSON format ONLY (no markdown, no explanation):
{{
"summary": "1-2 sentences max: What is it? Why would Laurent find it useful?",
"tag": "one of: to-read, tool, inspiration, learning, reference, interesting, project, tutorial, article, code, security",
"relevance": "very-relevant OR relevant OR nice-to-have"
}}
Be concise and practical."""
try:
# Use OpenClaw CLI to invoke sessions_spawn
# This spawns a sub-agent that analyzes the content
result = subprocess.run(
[
sys.executable, "-m", "openclaw",
"sessions", "spawn",
"--task", analysis_prompt,
"--model", "openrouter/anthropic/claude-haiku-4.5",
"--thinking", "off",
"--timeout", "15"
],
capture_output=True,
text=True,
timeout=20
)
# Extract useful info from HTML content
description = ""
# Looking for meta description
desc_match = re.search(r'<meta\s+name="description"\s+content="([^"]+)"', content, re.IGNORECASE)
if desc_match:
description = desc_match.group(1).strip()
# Looking for og:description
if not description:
og_desc = re.search(r'<meta\s+property="og:description"\s+content="([^"]+)"', content, re.IGNORECASE)
if og_desc:
description = og_desc.group(1).strip()
# Looking for first paragraph after title
if not description:
p_match = re.search(r'<p[^>]*>([^<]+)</p>', content, re.IGNORECASE)
if p_match:
description = p_match.group(1).strip()[:200]
# Determine tag based on content + URL + type
tag = "interesting"
summary = ""
if link_type == "GitHub":
tag = "project"
summary = f"GitHub repository: {title}"
# Try to extract more info from README
readme_match = re.search(r'README[^<]*</h[1-3]>[^<]*<p[^>]*>([^<]+)', content, re.IGNORECASE)
if readme_match:
summary += f". {readme_match.group(1)[:100]}"
elif link_type == "YouTube":
tag = "video"
summary = f"Video: {title}"
if description:
summary += f". {description[:80]}"
elif link_type == "Reddit":
tag = "discussion"
summary = f"Reddit discussion: {title}"
elif link_type == "Medium" or link_type == "Dev.to":
tag = "article"
summary = f"Article: {title}"
if description:
summary += f". {description[:80]}"
elif link_type == "arXiv":
tag = "learning"
summary = f"Research paper: {title}"
if result.returncode == 0:
output = result.stdout
logger.debug(f" Sub-agent response: {output[:200]}")
# Try to parse JSON
try:
json_match = re.search(r'\{[^{}]*"summary"[^{}]*\}', output, re.DOTALL)
if json_match:
analysis_data = json.loads(json_match.group())
logger.debug(f" ✓ Analysis parsed successfully")
return analysis_data
except json.JSONDecodeError:
pass
# Fallback: extract summary from text
summary_line = output.split('\n')[0][:200]
return {
"summary": summary_line,
"tag": "interesting",
"relevance": "relevant"
}
else:
logger.warning(f" Sub-agent error: {result.stderr[:200]}")
return None
# Generic web article
tag = "to-read"
summary = title
if description:
summary += f". {description[:100]}"
# Truncate summary to reasonable length
summary = summary[:200]
logger.debug(f" ✓ Tag: {tag}, Summary: {summary[:80]}")
return {
"summary": summary,
"tag": tag,
"relevance": "relevant"
}
except subprocess.TimeoutExpired:
logger.warning(f" Analysis timeout")
return None
except Exception as e:
logger.warning(f" Analysis error: {e}")
return None
logger.error(f" Analysis error: {e}")
# Return minimal analysis
return {
"summary": title,
"tag": "interesting",
"relevance": "relevant"
}
# Send to Tududi inbox
def add_to_tududi(title, url, link_type, summary="", tag=""):