Spaces:

edstellar
/

blog-audit

Sleeping

App Files Files Community

blog-audit / utils /llm_client

vijaykumaredstellar

Update utils/llm_client

1a9abc2 verified 5 months ago

Raw

History Blame Contribute Delete

5.21 kB

	import requests
	import json
	import time

	OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"

	MODEL_OPTIONS = {
	"🟣 Claude 3.5 Haiku (Fast & Cheap)": "anthropic/claude-3-5-haiku",
	"🟣 Claude 3.5 Sonnet (Most Capable)": "anthropic/claude-3-5-sonnet",
	"🔵 GPT-4o Mini (OpenAI - Fast)": "openai/gpt-4o-mini",
	"🔵 GPT-4o (OpenAI - Powerful)": "openai/gpt-4o",
	"🔴 Gemini Flash 1.5 (Google - Fast)": "google/gemini-flash-1.5",
	"🔴 Gemini Pro 1.5 (Google - Powerful)": "google/gemini-pro-1.5",
	"⚫ Grok 2 (xAI)": "x-ai/grok-2-1212",
	"🟡 DeepSeek V3 (Fast & Cheap)": "deepseek/deepseek-chat",
	"🟡 DeepSeek R1 (Reasoning Model)": "deepseek/deepseek-r1",
	}


	def test_connection(api_key: str, model_id: str) -> tuple[bool, str]:
	"""Ping the API with a minimal request. Returns (success, message)."""
	try:
	resp = requests.post(
	OPENROUTER_URL,
	headers=_headers(api_key),
	json={
	"model": model_id,
	"messages": [{"role": "user", "content": "Reply OK"}],
	"max_tokens": 5,
	},
	timeout=15,
	)
	if resp.status_code == 200:
	return True, "Connection successful."
	err = resp.json().get("error", {}).get("message", resp.text[:200])
	return False, f"API error {resp.status_code}: {err}"
	except Exception as e:
	return False, f"Connection failed: {str(e)}"


	def enrich_merge_pairs(
	pairs: list[dict],
	api_key: str,
	model_id: str,
	batch_size: int = 10,
	progress_callback=None,
	) -> list[dict]:
	"""
	Send merge pairs to the LLM in batches.
	Fills in topic_cluster and merge_reason for each pair.
	progress_callback(current, total) is called after each batch.
	"""
	total = len(pairs)
	batches = [pairs[i: i + batch_size] for i in range(0, total, batch_size)]
	results = []

	for b_idx, batch in enumerate(batches):
	try:
	llm_data = _call_llm(batch, api_key, model_id)
	for item in llm_data:
	idx = item.get("pair", 1) - 1
	if 0 <= idx < len(batch):
	batch[idx]["topic_cluster"] = item.get("topic_cluster", "General")
	batch[idx]["merge_reason"] = item.get("merge_reason", "Topically similar content detected.")
	batch[idx]["should_merge"] = item.get("should_merge", True)
	except Exception as e:
	# Graceful fallback — keep pairs with placeholder text
	for p in batch:
	p.setdefault("topic_cluster", "Review Manually")
	p.setdefault("merge_reason", f"LLM error: {str(e)[:80]}. Review manually.")
	p.setdefault("should_merge", True)

	results.extend(batch)

	if progress_callback:
	progress_callback(min((b_idx + 1) * batch_size, total), total)

	# Small delay to avoid rate limits
	if b_idx < len(batches) - 1:
	time.sleep(0.5)

	# Filter to only pairs the LLM recommends merging
	return [p for p in results if p.get("should_merge", True)]


	def _call_llm(batch: list[dict], api_key: str, model_id: str) -> list[dict]:
	pairs_text = ""
	for i, p in enumerate(batch):
	pairs_text += f"""
	Pair {i + 1}:
	WEAK → Title: "{p['weak_title']}" \| Clicks: {p['weak_clicks']}
	STRONG → Title: "{p['strong_title']}" \| Clicks: {p['strong_clicks']}
	Similarity: {p['similarity']}
	"""

	prompt = f"""You are an SEO content strategist auditing blog articles for a website.

	I have identified blog pairs that may be topically overlapping based on semantic similarity.
	The WEAK article has fewer clicks; the STRONG article has more.

	For each pair:
	1. Decide if the WEAK article should be merged INTO the STRONG article (should_merge: true/false)
	2. Provide a topic cluster name (2-4 words)
	3. Write a concise 1-2 sentence merge reason

	Return ONLY a valid JSON array — no markdown, no extra text:
	[
	{{
	"pair": 1,
	"should_merge": true,
	"topic_cluster": "Change Management",
	"merge_reason": "Both articles cover overlapping change management topics. Merge the weaker piece as a supporting section and implement a 301 redirect."
	}}
	]

	Pairs to analyze:
	{pairs_text}"""

	resp = requests.post(
	OPENROUTER_URL,
	headers=_headers(api_key),
	json={
	"model": model_id,
	"messages": [{"role": "user", "content": prompt}],
	"max_tokens": 2500,
	"temperature": 0.2,
	},
	timeout=90,
	)

	if resp.status_code != 200:
	raise Exception(f"API {resp.status_code}: {resp.text[:200]}")

	raw = resp.json()["choices"][0]["message"]["content"].strip()
	raw = raw.replace("```json", "").replace("```", "").strip()
	return json.loads(raw)


	def _headers(api_key: str) -> dict:
	return {
	"Authorization": f"Bearer {api_key}",
	"Content-Type": "application/json",
	"HTTP-Referer": "https://blog-audit.streamlit.app",
	"X-Title": "Blog Audit Tool",
	}