diff --git a/bot.py b/bot.py index 0d711e9..16df6fa 100644 --- a/bot.py +++ b/bot.py @@ -14,8 +14,6 @@ from pathlib import Path from dotenv import load_dotenv import logging from urllib.parse import urlparse -import subprocess -import sys # Load .env file load_dotenv() @@ -159,79 +157,90 @@ def fetch_url_content(url): logger.error(f" ❌ Error: {e}") return {"title": "Fetch failed", "status": "error", "error": str(e), "content": ""} -# Analyze content with AI (Haiku via gateway) +# Analyze content (local heuristic-based) def analyze_content(url, title, content, link_type): - """Analyze content and create intelligent summary""" + """Analyze content and suggest summary + tag locally""" logger.debug(f" 🤖 Analyzing content: {url}") - # Build analysis prompt - analysis_prompt = f"""Analyze this link and create a brief summary useful for Laurent. - -**Link**: {link_type} - {title} -**URL**: {url} - -**Content (first 1500 chars)**: -{content[:1500]} - ---- - -Respond in JSON format ONLY (no markdown, no explanation): -{{ - "summary": "1-2 sentences max: What is it? Why would Laurent find it useful?", - "tag": "one of: to-read, tool, inspiration, learning, reference, interesting, project, tutorial, article, code, security", - "relevance": "very-relevant OR relevant OR nice-to-have" -}} - -Be concise and practical.""" - try: - # Use OpenClaw CLI to invoke sessions_spawn - # This spawns a sub-agent that analyzes the content - result = subprocess.run( - [ - sys.executable, "-m", "openclaw", - "sessions", "spawn", - "--task", analysis_prompt, - "--model", "openrouter/anthropic/claude-haiku-4.5", - "--thinking", "off", - "--timeout", "15" - ], - capture_output=True, - text=True, - timeout=20 - ) + # Extract useful info from HTML content + description = "" + + # Looking for meta description + desc_match = re.search(r']*>([^<]+)

', content, re.IGNORECASE) + if p_match: + description = p_match.group(1).strip()[:200] + + # Determine tag based on content + URL + type + tag = "interesting" + summary = "" + + if link_type == "GitHub": + tag = "project" + summary = f"GitHub repository: {title}" + # Try to extract more info from README + readme_match = re.search(r'README[^<]*[^<]*]*>([^<]+)', content, re.IGNORECASE) + if readme_match: + summary += f". {readme_match.group(1)[:100]}" + + elif link_type == "YouTube": + tag = "video" + summary = f"Video: {title}" + if description: + summary += f". {description[:80]}" + + elif link_type == "Reddit": + tag = "discussion" + summary = f"Reddit discussion: {title}" + + elif link_type == "Medium" or link_type == "Dev.to": + tag = "article" + summary = f"Article: {title}" + if description: + summary += f". {description[:80]}" + + elif link_type == "arXiv": + tag = "learning" + summary = f"Research paper: {title}" - if result.returncode == 0: - output = result.stdout - logger.debug(f" Sub-agent response: {output[:200]}") - - # Try to parse JSON - try: - json_match = re.search(r'\{[^{}]*"summary"[^{}]*\}', output, re.DOTALL) - if json_match: - analysis_data = json.loads(json_match.group()) - logger.debug(f" ✓ Analysis parsed successfully") - return analysis_data - except json.JSONDecodeError: - pass - - # Fallback: extract summary from text - summary_line = output.split('\n')[0][:200] - return { - "summary": summary_line, - "tag": "interesting", - "relevance": "relevant" - } else: - logger.warning(f" Sub-agent error: {result.stderr[:200]}") - return None + # Generic web article + tag = "to-read" + summary = title + if description: + summary += f". {description[:100]}" + + # Truncate summary to reasonable length + summary = summary[:200] + + logger.debug(f" ✓ Tag: {tag}, Summary: {summary[:80]}") + + return { + "summary": summary, + "tag": tag, + "relevance": "relevant" + } - except subprocess.TimeoutExpired: - logger.warning(f" Analysis timeout") - return None except Exception as e: - logger.warning(f" Analysis error: {e}") - return None + logger.error(f" Analysis error: {e}") + # Return minimal analysis + return { + "summary": title, + "tag": "interesting", + "relevance": "relevant" + } # Send to Tududi inbox def add_to_tududi(title, url, link_type, summary="", tag=""):