import requests import json import time OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions" MODEL_OPTIONS = { "🟣 Claude 3.5 Haiku (Fast & Cheap)": "anthropic/claude-3-5-haiku", "🟣 Claude 3.5 Sonnet (Most Capable)": "anthropic/claude-3-5-sonnet", "🔵 GPT-4o Mini (OpenAI - Fast)": "openai/gpt-4o-mini", "🔵 GPT-4o (OpenAI - Powerful)": "openai/gpt-4o", "🔴 Gemini Flash 1.5 (Google - Fast)": "google/gemini-flash-1.5", "🔴 Gemini Pro 1.5 (Google - Powerful)": "google/gemini-pro-1.5", "⚫ Grok 2 (xAI)": "x-ai/grok-2-1212", "🟡 DeepSeek V3 (Fast & Cheap)": "deepseek/deepseek-chat", "🟡 DeepSeek R1 (Reasoning Model)": "deepseek/deepseek-r1", } def test_connection(api_key: str, model_id: str) -> tuple[bool, str]: """Ping the API with a minimal request. Returns (success, message).""" try: resp = requests.post( OPENROUTER_URL, headers=_headers(api_key), json={ "model": model_id, "messages": [{"role": "user", "content": "Reply OK"}], "max_tokens": 5, }, timeout=15, ) if resp.status_code == 200: return True, "Connection successful." err = resp.json().get("error", {}).get("message", resp.text[:200]) return False, f"API error {resp.status_code}: {err}" except Exception as e: return False, f"Connection failed: {str(e)}" def enrich_merge_pairs( pairs: list[dict], api_key: str, model_id: str, batch_size: int = 10, progress_callback=None, ) -> list[dict]: """ Send merge pairs to the LLM in batches. Fills in topic_cluster and merge_reason for each pair. progress_callback(current, total) is called after each batch. """ total = len(pairs) batches = [pairs[i: i + batch_size] for i in range(0, total, batch_size)] results = [] for b_idx, batch in enumerate(batches): try: llm_data = _call_llm(batch, api_key, model_id) for item in llm_data: idx = item.get("pair", 1) - 1 if 0 <= idx < len(batch): batch[idx]["topic_cluster"] = item.get("topic_cluster", "General") batch[idx]["merge_reason"] = item.get("merge_reason", "Topically similar content detected.") batch[idx]["should_merge"] = item.get("should_merge", True) except Exception as e: # Graceful fallback — keep pairs with placeholder text for p in batch: p.setdefault("topic_cluster", "Review Manually") p.setdefault("merge_reason", f"LLM error: {str(e)[:80]}. Review manually.") p.setdefault("should_merge", True) results.extend(batch) if progress_callback: progress_callback(min((b_idx + 1) * batch_size, total), total) # Small delay to avoid rate limits if b_idx < len(batches) - 1: time.sleep(0.5) # Filter to only pairs the LLM recommends merging return [p for p in results if p.get("should_merge", True)] def _call_llm(batch: list[dict], api_key: str, model_id: str) -> list[dict]: pairs_text = "" for i, p in enumerate(batch): pairs_text += f""" Pair {i + 1}: WEAK → Title: "{p['weak_title']}" | Clicks: {p['weak_clicks']} STRONG → Title: "{p['strong_title']}" | Clicks: {p['strong_clicks']} Similarity: {p['similarity']} """ prompt = f"""You are an SEO content strategist auditing blog articles for a website. I have identified blog pairs that may be topically overlapping based on semantic similarity. The WEAK article has fewer clicks; the STRONG article has more. For each pair: 1. Decide if the WEAK article should be merged INTO the STRONG article (should_merge: true/false) 2. Provide a topic cluster name (2-4 words) 3. Write a concise 1-2 sentence merge reason Return ONLY a valid JSON array — no markdown, no extra text: [ {{ "pair": 1, "should_merge": true, "topic_cluster": "Change Management", "merge_reason": "Both articles cover overlapping change management topics. Merge the weaker piece as a supporting section and implement a 301 redirect." }} ] Pairs to analyze: {pairs_text}""" resp = requests.post( OPENROUTER_URL, headers=_headers(api_key), json={ "model": model_id, "messages": [{"role": "user", "content": prompt}], "max_tokens": 2500, "temperature": 0.2, }, timeout=90, ) if resp.status_code != 200: raise Exception(f"API {resp.status_code}: {resp.text[:200]}") raw = resp.json()["choices"][0]["message"]["content"].strip() raw = raw.replace("```json", "").replace("```", "").strip() return json.loads(raw) def _headers(api_key: str) -> dict: return { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "HTTP-Referer": "https://blog-audit.streamlit.app", "X-Title": "Blog Audit Tool", }