From 23548be6adc67660c0038cc5843b19af6290423d Mon Sep 17 00:00:00 2001 From: Remora Date: Mon, 9 Feb 2026 18:07:14 +0100 Subject: [PATCH] Initial: Link analyzer bot for #remora channel --- .env.example | 10 +++ .gitignore | 7 ++ README.md | 116 +++++++++++++++++++++++++++++ analyze-links.sh | 66 ++++++++++++++++ bot.py | 190 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 389 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 README.md create mode 100755 analyze-links.sh create mode 100644 bot.py diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..779f804 --- /dev/null +++ b/.env.example @@ -0,0 +1,10 @@ +# Discord Bot Token (from https://discord.com/developers/applications) +DISCORD_BOT_TOKEN=your_token_here + +# Tududi API +TUDUDI_API_URL=https://todo.dilain.com/api/v1 +TUDUDI_API_KEY=tt_your_key_here + +# OpenClaw Gateway (optional, for future AI analysis) +OPENCLAW_GATEWAY=http://127.0.0.1:18789 +OPENCLAW_GATEWAY_TOKEN=your_gateway_token diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3a51f74 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.env +.env.local +*.pyc +__pycache__/ +.DS_Store +tracker.json +venv/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..7b06153 --- /dev/null +++ b/README.md @@ -0,0 +1,116 @@ +# Link Analyzer Bot - #remora + +Analyzes links posted in #remora channel in real-time. Fetches content, creates summaries, and adds to Tududi inbox. + +## Setup + +### 1. Install dependencies + +```bash +pip install discord.py requests +``` + +### 2. Get Discord Bot Token + +If you don't have a bot token: +1. Go to https://discord.com/developers/applications +2. Click "New Application" +3. Name it "Link Analyzer" (or whatever) +4. Go to "Bot" tab → "Add Bot" +5. Copy the token +6. Under OAuth2 → Scopes, select: `bot` +7. Under Permissions, select: `Read Messages/View Channels`, `Send Messages`, `Read Message History` +8. Go to the generated URL and add bot to your server + +### 3. Set environment variables + +```bash +export DISCORD_BOT_TOKEN="your_token_here" +export TUDUDI_API_URL="https://todo.dilain.com/api/v1" +export TUDUDI_API_KEY="tt_5e3ac7fc2bf5ae5162ebac5d1d66dcc2ff9d9d0ab343b9d3d4c5a7c439ef67f5" +export OPENCLAW_GATEWAY="http://127.0.0.1:18789" +export OPENCLAW_GATEWAY_TOKEN="your_gateway_token" +``` + +### 4. Run bot + +```bash +python3 bot.py +``` + +Should see: +``` +āœ… Bot logged in as LinkAnalyzer#1234 +šŸ“ Watching channel #remora (1467557082583535729) +``` + +## What it does + +1. **Real-time monitoring** - Listens to all messages in #remora +2. **Link detection** - Extracts URLs from messages +3. **Content fetching** - Downloads and analyzes page content +4. **Type detection** - GitHub, Reddit, YouTube, TikTok, etc. +5. **Summary response** - Replies with format: `šŸ“Œ **Type**: Title` +6. **Tududi integration** - Adds to inbox: `šŸ“Œ Type: Title\nšŸ”— URL` +7. **History tracking** - Saves to `tracker.json` (all processed links) + +## Files + +- `bot.py` - Main Discord bot +- `tracker.json` - History of all processed links +- `analyze-links.sh` - Old cron version (deprecated) + +## Tracking + +All links are saved in `tracker.json`: +```json +{ + "links": [ + { + "url": "https://...", + "title": "Article Title", + "type": "GitHub", + "author": "username", + "message_id": 123456, + "date": "2026-02-09T18:05:00", + "tududi": true + } + ] +} +``` + +## Running as service + +To run permanently (e.g., on a VPS): + +```bash +# Using screen +screen -S link-bot +python3 bot.py + +# Or systemd service +# TODO: Add systemd unit file +``` + +## Troubleshooting + +**Bot not seeing messages:** +- Check DISCORD_BOT_TOKEN is correct +- Verify bot has "Read Message History" permission +- Make sure bot is in the server + +**Can't fetch links:** +- Some sites block scrapers → error logged in response +- Timeouts after 5 seconds + +**Tududi not getting items:** +- Check TUDUDI_API_KEY is set +- Verify API endpoint is reachable + +## Future enhancements + +- [ ] Summarization with AI (use Haiku analysis) +- [ ] Tag suggestions based on content +- [ ] React with šŸ‘€ when processing +- [ ] Edit summary if analysis completes +- [ ] Support for media attachments diff --git a/analyze-links.sh b/analyze-links.sh new file mode 100755 index 0000000..aa06d81 --- /dev/null +++ b/analyze-links.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# Scan #remora for new links, analyze, and add to Tududi inbox +# Run via cron every 5 minutes + +set -euo pipefail + +CHANNEL_ID="1467557082583535729" +TRACKER_FILE="$(dirname "$0")/tracker.json" +GATEWAY_URL="${OPENCLAW_GATEWAY:-http://127.0.0.1:18789}" +GATEWAY_TOKEN="${OPENCLAW_GATEWAY_TOKEN:-}" + +if [ -z "$GATEWAY_TOKEN" ]; then + echo "āš ļø OPENCLAW_GATEWAY_TOKEN not set" + exit 1 +fi + +# Read last check timestamp +last_check=$(jq -r '.last_check' "$TRACKER_FILE") +processed_ids=$(jq -r '.processed_message_ids[]' "$TRACKER_FILE" 2>/dev/null || echo "") + +# Fetch recent messages from #remora +echo "šŸ“” Scanning #remora for new links..." +messages=$(curl -s \ + -H "Authorization: Bearer ${DISCORD_BOT_TOKEN:-}" \ + "https://discord.com/api/v10/channels/$CHANNEL_ID/messages?limit=50" 2>/dev/null || echo "[]") + +# Extract URLs and process new ones +echo "$messages" | jq -r '.[] | select(.content | test("http")) | "\(.id)|\(.content)|\(.author.username)"' | while IFS='|' read -r msg_id content author; do + # Skip if already processed + if echo "$processed_ids" | grep -q "$msg_id"; then + continue + fi + + echo "šŸ”— Found message from $author: $content" + + # Extract URL (simple regex) + url=$(echo "$content" | grep -oP 'https?://[^\s]+' | head -1) + + if [ -z "$url" ]; then + continue + fi + + echo " URL: $url" + + # Fetch and analyze + title=$(curl -s -I "$url" 2>/dev/null | grep -i "title" | cut -d' ' -f2- || echo "Unknown") + + # Send to Tududi inbox with summary + if [ -n "$title" ]; then + echo " āž• Adding to Tududi inbox: $title" + curl -s -X POST "https://todo.dilain.com/api/v1/inbox" \ + -H "Authorization: Bearer ${TUDUDI_API_KEY:-}" \ + -H "Content-Type: application/json" \ + -d "{\"content\":\"šŸ“Œ $title\nšŸ”— $url\"}" > /dev/null 2>&1 + fi + + # Update tracker + jq ".processed_message_ids += [\"$msg_id\"]" "$TRACKER_FILE" > "$TRACKER_FILE.tmp" + mv "$TRACKER_FILE.tmp" "$TRACKER_FILE" +done + +# Update last check +jq ".last_check = \"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"" "$TRACKER_FILE" > "$TRACKER_FILE.tmp" +mv "$TRACKER_FILE.tmp" "$TRACKER_FILE" + +echo "āœ… Scan complete" diff --git a/bot.py b/bot.py new file mode 100644 index 0000000..0399a90 --- /dev/null +++ b/bot.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python3 +""" +Discord bot for #remora channel - analyzes links in real-time +Posts summaries, adds to Tududi inbox, maintains JSON history +""" + +import discord +import os +import json +import re +import requests +from datetime import datetime +from pathlib import Path +from dotenv import load_dotenv + +# Load .env file +load_dotenv() + +# Config +CHANNEL_ID = 1467557082583535729 +TRACKER_FILE = Path(__file__).parent / "tracker.json" +TUDUDI_API_URL = os.getenv("TUDUDI_API_URL", "https://todo.dilain.com/api/v1") +TUDUDI_API_KEY = os.getenv("TUDUDI_API_KEY") +GATEWAY_URL = os.getenv("OPENCLAW_GATEWAY", "http://127.0.0.1:18789") +GATEWAY_TOKEN = os.getenv("OPENCLAW_GATEWAY_TOKEN") + +# Load or init tracker +def load_tracker(): + if TRACKER_FILE.exists(): + with open(TRACKER_FILE) as f: + return json.load(f) + return { + "channel_id": CHANNEL_ID, + "processed_message_ids": [], + "links": [] + } + +def save_tracker(data): + with open(TRACKER_FILE, "w") as f: + json.dump(data, f, indent=2) + +# Detect links in text +def extract_urls(text): + url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+' + return re.findall(url_pattern, text) + +# Fetch and analyze URL +def analyze_url(url): + """Fetch URL and create summary""" + try: + print(f" šŸ“„ Fetching: {url}") + response = requests.get(url, timeout=5, headers={ + 'User-Agent': 'Mozilla/5.0' + }) + content = response.text[:2000] # First 2k chars + + # Extract title + title_match = re.search(r']*>([^<]+)', content, re.IGNORECASE) + title = title_match.group(1).strip() if title_match else url.split('/')[-1] + + # Simple content type detection + link_type = "webpage" + if "github.com" in url: + link_type = "GitHub" + elif "reddit.com" in url: + link_type = "Reddit" + elif "youtube.com" in url or "youtu.be" in url: + link_type = "YouTube" + elif "tiktok.com" in url: + link_type = "TikTok" + elif "twitter.com" in url or "x.com" in url: + link_type = "Twitter/X" + + return { + "title": title, + "type": link_type, + "status": "ok" + } + except Exception as e: + print(f" āŒ Error fetching: {e}") + return { + "title": "Couldn't fetch", + "type": "unknown", + "status": "error", + "error": str(e) + } + +# Send to Tududi inbox +def add_to_tududi(title, url, link_type): + """Add to Tududi inbox with summary""" + try: + if not TUDUDI_API_KEY: + print(" āš ļø TUDUDI_API_KEY not set") + return False + + content = f"šŸ“Œ {link_type}: {title}\nšŸ”— {url}" + + response = requests.post( + f"{TUDUDI_API_URL}/inbox", + headers={ + "Authorization": f"Bearer {TUDUDI_API_KEY}", + "Content-Type": "application/json" + }, + json={"content": content}, + timeout=5 + ) + + if response.status_code == 200: + print(f" āœ… Added to Tududi: {title}") + return True + else: + print(f" āš ļø Tududi error: {response.status_code}") + return False + except Exception as e: + print(f" āŒ Tududi error: {e}") + return False + +# Discord bot +intents = discord.Intents.default() +intents.message_content = True + +class LinkAnalyzerBot(discord.Client): + async def on_ready(self): + print(f"āœ… Bot logged in as {self.user}") + print(f"šŸ“ Watching channel #remora ({CHANNEL_ID})") + + async def on_message(self, message): + # Ignore bot's own messages + if message.author == self.user: + return + + # Only process #remora channel + if message.channel.id != CHANNEL_ID: + return + + # Check for URLs + urls = extract_urls(message.content) + if not urls: + return + + # Skip if already processed + tracker = load_tracker() + if message.id in tracker["processed_message_ids"]: + return + + print(f"šŸ”— New link from {message.author}: {message.content}") + + # Process each URL + for url in urls: + print(f" Processing: {url}") + + # Analyze + analysis = analyze_url(url) + + # Add to Tududi + add_to_tududi(analysis["title"], url, analysis["type"]) + + # Prepare response + summary = f"šŸ“Œ **{analysis['type']}**: {analysis['title']}" + if analysis["status"] == "error": + summary += f"\nāš ļø {analysis['error']}" + + # Post summary in channel + await message.reply(summary, mention_author=False) + + # Add to tracker + tracker["links"].append({ + "url": url, + "title": analysis["title"], + "type": analysis["type"], + "author": str(message.author), + "message_id": message.id, + "date": datetime.now().isoformat(), + "tududi": True + }) + + # Update processed IDs + tracker["processed_message_ids"].append(message.id) + save_tracker(tracker) + +# Main +if __name__ == "__main__": + token = os.getenv("DISCORD_BOT_TOKEN") + if not token: + print("āŒ DISCORD_BOT_TOKEN not set!") + print("Set it: export DISCORD_BOT_TOKEN='your_token'") + exit(1) + + bot = LinkAnalyzerBot(intents=intents) + bot.run(token)