From e4421d7bc978a3b798b0729c91e0b5d33f299149 Mon Sep 17 00:00:00 2001
From: Remora
Date: Mon, 9 Feb 2026 19:05:31 +0100
Subject: [PATCH] Improve: Local heuristic-based analysis (no subprocess
needed, fast)
---
bot.py | 143 ++++++++++++++++++++++++++++++---------------------------
1 file changed, 76 insertions(+), 67 deletions(-)
diff --git a/bot.py b/bot.py
index 0d711e9..16df6fa 100644
--- a/bot.py
+++ b/bot.py
@@ -14,8 +14,6 @@ from pathlib import Path
from dotenv import load_dotenv
import logging
from urllib.parse import urlparse
-import subprocess
-import sys
# Load .env file
load_dotenv()
@@ -159,79 +157,90 @@ def fetch_url_content(url):
logger.error(f" ❌ Error: {e}")
return {"title": "Fetch failed", "status": "error", "error": str(e), "content": ""}
-# Analyze content with AI (Haiku via gateway)
+# Analyze content (local heuristic-based)
def analyze_content(url, title, content, link_type):
- """Analyze content and create intelligent summary"""
+ """Analyze content and suggest summary + tag locally"""
logger.debug(f" 🤖 Analyzing content: {url}")
- # Build analysis prompt
- analysis_prompt = f"""Analyze this link and create a brief summary useful for Laurent.
-
-**Link**: {link_type} - {title}
-**URL**: {url}
-
-**Content (first 1500 chars)**:
-{content[:1500]}
-
----
-
-Respond in JSON format ONLY (no markdown, no explanation):
-{{
- "summary": "1-2 sentences max: What is it? Why would Laurent find it useful?",
- "tag": "one of: to-read, tool, inspiration, learning, reference, interesting, project, tutorial, article, code, security",
- "relevance": "very-relevant OR relevant OR nice-to-have"
-}}
-
-Be concise and practical."""
-
try:
- # Use OpenClaw CLI to invoke sessions_spawn
- # This spawns a sub-agent that analyzes the content
- result = subprocess.run(
- [
- sys.executable, "-m", "openclaw",
- "sessions", "spawn",
- "--task", analysis_prompt,
- "--model", "openrouter/anthropic/claude-haiku-4.5",
- "--thinking", "off",
- "--timeout", "15"
- ],
- capture_output=True,
- text=True,
- timeout=20
- )
+ # Extract useful info from HTML content
+ description = ""
+
+ # Looking for meta description
+ desc_match = re.search(r']*>([^<]+)
', content, re.IGNORECASE)
+ if p_match:
+ description = p_match.group(1).strip()[:200]
+
+ # Determine tag based on content + URL + type
+ tag = "interesting"
+ summary = ""
+
+ if link_type == "GitHub":
+ tag = "project"
+ summary = f"GitHub repository: {title}"
+ # Try to extract more info from README
+ readme_match = re.search(r'README[^<]*[^<]*]*>([^<]+)', content, re.IGNORECASE)
+ if readme_match:
+ summary += f". {readme_match.group(1)[:100]}"
+
+ elif link_type == "YouTube":
+ tag = "video"
+ summary = f"Video: {title}"
+ if description:
+ summary += f". {description[:80]}"
+
+ elif link_type == "Reddit":
+ tag = "discussion"
+ summary = f"Reddit discussion: {title}"
+
+ elif link_type == "Medium" or link_type == "Dev.to":
+ tag = "article"
+ summary = f"Article: {title}"
+ if description:
+ summary += f". {description[:80]}"
+
+ elif link_type == "arXiv":
+ tag = "learning"
+ summary = f"Research paper: {title}"
- if result.returncode == 0:
- output = result.stdout
- logger.debug(f" Sub-agent response: {output[:200]}")
-
- # Try to parse JSON
- try:
- json_match = re.search(r'\{[^{}]*"summary"[^{}]*\}', output, re.DOTALL)
- if json_match:
- analysis_data = json.loads(json_match.group())
- logger.debug(f" ✓ Analysis parsed successfully")
- return analysis_data
- except json.JSONDecodeError:
- pass
-
- # Fallback: extract summary from text
- summary_line = output.split('\n')[0][:200]
- return {
- "summary": summary_line,
- "tag": "interesting",
- "relevance": "relevant"
- }
else:
- logger.warning(f" Sub-agent error: {result.stderr[:200]}")
- return None
+ # Generic web article
+ tag = "to-read"
+ summary = title
+ if description:
+ summary += f". {description[:100]}"
+
+ # Truncate summary to reasonable length
+ summary = summary[:200]
+
+ logger.debug(f" ✓ Tag: {tag}, Summary: {summary[:80]}")
+
+ return {
+ "summary": summary,
+ "tag": tag,
+ "relevance": "relevant"
+ }
- except subprocess.TimeoutExpired:
- logger.warning(f" Analysis timeout")
- return None
except Exception as e:
- logger.warning(f" Analysis error: {e}")
- return None
+ logger.error(f" Analysis error: {e}")
+ # Return minimal analysis
+ return {
+ "summary": title,
+ "tag": "interesting",
+ "relevance": "relevant"
+ }
# Send to Tududi inbox
def add_to_tududi(title, url, link_type, summary="", tag=""):