Spaces:
Sleeping
Sleeping
| import requests | |
| import json | |
| import time | |
| OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions" | |
| MODEL_OPTIONS = { | |
| "π£ Claude 3.5 Haiku (Fast & Cheap)": "anthropic/claude-3-5-haiku", | |
| "π£ Claude 3.5 Sonnet (Most Capable)": "anthropic/claude-3-5-sonnet", | |
| "π΅ GPT-4o Mini (OpenAI - Fast)": "openai/gpt-4o-mini", | |
| "π΅ GPT-4o (OpenAI - Powerful)": "openai/gpt-4o", | |
| "π΄ Gemini Flash 1.5 (Google - Fast)": "google/gemini-flash-1.5", | |
| "π΄ Gemini Pro 1.5 (Google - Powerful)": "google/gemini-pro-1.5", | |
| "β« Grok 2 (xAI)": "x-ai/grok-2-1212", | |
| "π‘ DeepSeek V3 (Fast & Cheap)": "deepseek/deepseek-chat", | |
| "π‘ DeepSeek R1 (Reasoning Model)": "deepseek/deepseek-r1", | |
| } | |
| def test_connection(api_key: str, model_id: str) -> tuple[bool, str]: | |
| """Ping the API with a minimal request. Returns (success, message).""" | |
| try: | |
| resp = requests.post( | |
| OPENROUTER_URL, | |
| headers=_headers(api_key), | |
| json={ | |
| "model": model_id, | |
| "messages": [{"role": "user", "content": "Reply OK"}], | |
| "max_tokens": 5, | |
| }, | |
| timeout=15, | |
| ) | |
| if resp.status_code == 200: | |
| return True, "Connection successful." | |
| err = resp.json().get("error", {}).get("message", resp.text[:200]) | |
| return False, f"API error {resp.status_code}: {err}" | |
| except Exception as e: | |
| return False, f"Connection failed: {str(e)}" | |
| def enrich_merge_pairs( | |
| pairs: list[dict], | |
| api_key: str, | |
| model_id: str, | |
| batch_size: int = 10, | |
| progress_callback=None, | |
| ) -> list[dict]: | |
| """ | |
| Send merge pairs to the LLM in batches. | |
| Fills in topic_cluster and merge_reason for each pair. | |
| progress_callback(current, total) is called after each batch. | |
| """ | |
| total = len(pairs) | |
| batches = [pairs[i: i + batch_size] for i in range(0, total, batch_size)] | |
| results = [] | |
| for b_idx, batch in enumerate(batches): | |
| try: | |
| llm_data = _call_llm(batch, api_key, model_id) | |
| for item in llm_data: | |
| idx = item.get("pair", 1) - 1 | |
| if 0 <= idx < len(batch): | |
| batch[idx]["topic_cluster"] = item.get("topic_cluster", "General") | |
| batch[idx]["merge_reason"] = item.get("merge_reason", "Topically similar content detected.") | |
| batch[idx]["should_merge"] = item.get("should_merge", True) | |
| except Exception as e: | |
| # Graceful fallback β keep pairs with placeholder text | |
| for p in batch: | |
| p.setdefault("topic_cluster", "Review Manually") | |
| p.setdefault("merge_reason", f"LLM error: {str(e)[:80]}. Review manually.") | |
| p.setdefault("should_merge", True) | |
| results.extend(batch) | |
| if progress_callback: | |
| progress_callback(min((b_idx + 1) * batch_size, total), total) | |
| # Small delay to avoid rate limits | |
| if b_idx < len(batches) - 1: | |
| time.sleep(0.5) | |
| # Filter to only pairs the LLM recommends merging | |
| return [p for p in results if p.get("should_merge", True)] | |
| def _call_llm(batch: list[dict], api_key: str, model_id: str) -> list[dict]: | |
| pairs_text = "" | |
| for i, p in enumerate(batch): | |
| pairs_text += f""" | |
| Pair {i + 1}: | |
| WEAK β Title: "{p['weak_title']}" | Clicks: {p['weak_clicks']} | |
| STRONG β Title: "{p['strong_title']}" | Clicks: {p['strong_clicks']} | |
| Similarity: {p['similarity']} | |
| """ | |
| prompt = f"""You are an SEO content strategist auditing blog articles for a website. | |
| I have identified blog pairs that may be topically overlapping based on semantic similarity. | |
| The WEAK article has fewer clicks; the STRONG article has more. | |
| For each pair: | |
| 1. Decide if the WEAK article should be merged INTO the STRONG article (should_merge: true/false) | |
| 2. Provide a topic cluster name (2-4 words) | |
| 3. Write a concise 1-2 sentence merge reason | |
| Return ONLY a valid JSON array β no markdown, no extra text: | |
| [ | |
| {{ | |
| "pair": 1, | |
| "should_merge": true, | |
| "topic_cluster": "Change Management", | |
| "merge_reason": "Both articles cover overlapping change management topics. Merge the weaker piece as a supporting section and implement a 301 redirect." | |
| }} | |
| ] | |
| Pairs to analyze: | |
| {pairs_text}""" | |
| resp = requests.post( | |
| OPENROUTER_URL, | |
| headers=_headers(api_key), | |
| json={ | |
| "model": model_id, | |
| "messages": [{"role": "user", "content": prompt}], | |
| "max_tokens": 2500, | |
| "temperature": 0.2, | |
| }, | |
| timeout=90, | |
| ) | |
| if resp.status_code != 200: | |
| raise Exception(f"API {resp.status_code}: {resp.text[:200]}") | |
| raw = resp.json()["choices"][0]["message"]["content"].strip() | |
| raw = raw.replace("```json", "").replace("```", "").strip() | |
| return json.loads(raw) | |
| def _headers(api_key: str) -> dict: | |
| return { | |
| "Authorization": f"Bearer {api_key}", | |
| "Content-Type": "application/json", | |
| "HTTP-Referer": "https://blog-audit.streamlit.app", | |
| "X-Title": "Blog Audit Tool", | |
| } | |