blog-audit / utils /llm_client
vijaykumaredstellar's picture
Update utils/llm_client
1a9abc2 verified
import requests
import json
import time
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
MODEL_OPTIONS = {
"🟣 Claude 3.5 Haiku (Fast & Cheap)": "anthropic/claude-3-5-haiku",
"🟣 Claude 3.5 Sonnet (Most Capable)": "anthropic/claude-3-5-sonnet",
"πŸ”΅ GPT-4o Mini (OpenAI - Fast)": "openai/gpt-4o-mini",
"πŸ”΅ GPT-4o (OpenAI - Powerful)": "openai/gpt-4o",
"πŸ”΄ Gemini Flash 1.5 (Google - Fast)": "google/gemini-flash-1.5",
"πŸ”΄ Gemini Pro 1.5 (Google - Powerful)": "google/gemini-pro-1.5",
"⚫ Grok 2 (xAI)": "x-ai/grok-2-1212",
"🟑 DeepSeek V3 (Fast & Cheap)": "deepseek/deepseek-chat",
"🟑 DeepSeek R1 (Reasoning Model)": "deepseek/deepseek-r1",
}
def test_connection(api_key: str, model_id: str) -> tuple[bool, str]:
"""Ping the API with a minimal request. Returns (success, message)."""
try:
resp = requests.post(
OPENROUTER_URL,
headers=_headers(api_key),
json={
"model": model_id,
"messages": [{"role": "user", "content": "Reply OK"}],
"max_tokens": 5,
},
timeout=15,
)
if resp.status_code == 200:
return True, "Connection successful."
err = resp.json().get("error", {}).get("message", resp.text[:200])
return False, f"API error {resp.status_code}: {err}"
except Exception as e:
return False, f"Connection failed: {str(e)}"
def enrich_merge_pairs(
pairs: list[dict],
api_key: str,
model_id: str,
batch_size: int = 10,
progress_callback=None,
) -> list[dict]:
"""
Send merge pairs to the LLM in batches.
Fills in topic_cluster and merge_reason for each pair.
progress_callback(current, total) is called after each batch.
"""
total = len(pairs)
batches = [pairs[i: i + batch_size] for i in range(0, total, batch_size)]
results = []
for b_idx, batch in enumerate(batches):
try:
llm_data = _call_llm(batch, api_key, model_id)
for item in llm_data:
idx = item.get("pair", 1) - 1
if 0 <= idx < len(batch):
batch[idx]["topic_cluster"] = item.get("topic_cluster", "General")
batch[idx]["merge_reason"] = item.get("merge_reason", "Topically similar content detected.")
batch[idx]["should_merge"] = item.get("should_merge", True)
except Exception as e:
# Graceful fallback β€” keep pairs with placeholder text
for p in batch:
p.setdefault("topic_cluster", "Review Manually")
p.setdefault("merge_reason", f"LLM error: {str(e)[:80]}. Review manually.")
p.setdefault("should_merge", True)
results.extend(batch)
if progress_callback:
progress_callback(min((b_idx + 1) * batch_size, total), total)
# Small delay to avoid rate limits
if b_idx < len(batches) - 1:
time.sleep(0.5)
# Filter to only pairs the LLM recommends merging
return [p for p in results if p.get("should_merge", True)]
def _call_llm(batch: list[dict], api_key: str, model_id: str) -> list[dict]:
pairs_text = ""
for i, p in enumerate(batch):
pairs_text += f"""
Pair {i + 1}:
WEAK β†’ Title: "{p['weak_title']}" | Clicks: {p['weak_clicks']}
STRONG β†’ Title: "{p['strong_title']}" | Clicks: {p['strong_clicks']}
Similarity: {p['similarity']}
"""
prompt = f"""You are an SEO content strategist auditing blog articles for a website.
I have identified blog pairs that may be topically overlapping based on semantic similarity.
The WEAK article has fewer clicks; the STRONG article has more.
For each pair:
1. Decide if the WEAK article should be merged INTO the STRONG article (should_merge: true/false)
2. Provide a topic cluster name (2-4 words)
3. Write a concise 1-2 sentence merge reason
Return ONLY a valid JSON array β€” no markdown, no extra text:
[
{{
"pair": 1,
"should_merge": true,
"topic_cluster": "Change Management",
"merge_reason": "Both articles cover overlapping change management topics. Merge the weaker piece as a supporting section and implement a 301 redirect."
}}
]
Pairs to analyze:
{pairs_text}"""
resp = requests.post(
OPENROUTER_URL,
headers=_headers(api_key),
json={
"model": model_id,
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 2500,
"temperature": 0.2,
},
timeout=90,
)
if resp.status_code != 200:
raise Exception(f"API {resp.status_code}: {resp.text[:200]}")
raw = resp.json()["choices"][0]["message"]["content"].strip()
raw = raw.replace("```json", "").replace("```", "").strip()
return json.loads(raw)
def _headers(api_key: str) -> dict:
return {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
"HTTP-Referer": "https://blog-audit.streamlit.app",
"X-Title": "Blog Audit Tool",
}