Spaces:

DevanshuDon
/

feed-ranking

Sleeping

App Files Files Community

feed-ranking / server /app.py

DevanshuDon

Update server/app.py

01c7fa8 verified about 2 months ago

raw

history blame contribute delete

22.6 kB


	# app.py — Feed Ranking OpenEnv Environment

	import random
	import sys
	import os
	from pathlib import Path

	# Ensure server/ directory is on the path so 'from data import ...' works
	# whether run as 'server.app' from root or 'app' from server/
	sys.path.insert(0, str(Path(__file__).parent))

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	from typing import List, Optional
	from data import (
	POSTS, USERS, TASKS, TOPICS,
	compute_relevance, compute_ndcg, check_policies,
	select_candidates, compute_ideal_ranking,
	select_candidates_enhanced, simulate_engagement
	)
	import statistics
	import json
	from openai import OpenAI
	from dotenv import load_dotenv

	load_dotenv()

	# ============================================================
	# PYDANTIC MODELS — typed inputs
	# ============================================================

	class RankingAction(BaseModel):
	"""Agent's answer — a ranked list of post IDs (best first)."""
	ranked_post_ids: List[str]


	# ============================================================
	# THE ENVIRONMENT CLASS
	# ============================================================

	class FeedRankingEnv:
	def __init__(self):
	self.current_task = None
	self.current_user = None
	self.candidate_posts = []
	self.episode_done = False
	self.total_score = 0.0
	self.steps_taken = 0
	self.hard_user_index = 0
	self.hard_scores = []
	self.hard_policy_scores = []
	self.seed = 42

	def reset(self, task: str = "easy"):
	"""Start a new episode."""

	if task not in TASKS:
	raise ValueError(f"Unknown task: {task}. Choose from: easy, medium, hard")

	self.current_task = task
	self.episode_done = False
	self.total_score = 0.0
	self.steps_taken = 0

	task_def = TASKS[task]

	if task == "hard":
	self.hard_user_index = 0
	self.hard_scores = []
	self.hard_policy_scores = []
	user_id = task_def["user_ids"][0]
	else:
	user_id = task_def["user_ids"][0]

	self.current_user = USERS[user_id]
	self.candidate_posts = select_candidates_enhanced(
	task_def["num_candidates"],
	seed=self.seed + hash(task) % 1000
	)

	return {
	"observation": self._build_observation(),
	"reward": 0.0,
	"done": False,
	"info": {
	"task": task,
	"num_candidates": len(self.candidate_posts),
	"top_k": task_def["top_k"],
	"steps_taken": 0,
	}
	}

	def step(self, action: dict):
	"""Agent submits a ranking — environment scores it."""

	if self.episode_done:
	return {
	"observation": "Episode is done. Call /reset to start again.",
	"reward": 0.0,
	"done": True,
	"info": {"total_score": self.total_score}
	}

	ranked_ids = action.get("ranked_post_ids", [])
	task_def = TASKS[self.current_task]
	top_k = task_def["top_k"]

	# Validate IDs — penalize invalid/destructive actions
	valid_ids = {p["id"] for p in self.candidate_posts}
	invalid = [pid for pid in ranked_ids if pid not in valid_ids]
	if invalid:
	return {
	"observation": f"Invalid post IDs not in candidate pool: {invalid}. Penalty applied.",
	"reward": -0.2,
	"done": False,
	"info": {"error": "invalid_post_ids", "invalid": invalid, "penalty": -0.2}
	}

	# Penalize empty or too-short rankings
	if len(ranked_ids) == 0:
	return {
	"observation": "Empty ranking submitted. Penalty applied.",
	"reward": -0.3,
	"done": False,
	"info": {"error": "empty_ranking", "penalty": -0.3}
	}

	# Penalize duplicate post IDs (lazy/destructive behavior)
	if len(ranked_ids) != len(set(ranked_ids)):
	dupes = [pid for pid in ranked_ids if ranked_ids.count(pid) > 1]
	return {
	"observation": f"Duplicate post IDs detected: {set(dupes)}. Penalty applied.",
	"reward": -0.1,
	"done": False,
	"info": {"error": "duplicate_ids", "duplicates": list(set(dupes)), "penalty": -0.1}
	}

	# Compute NDCG
	ndcg = compute_ndcg(
	self.current_user, ranked_ids, self.candidate_posts, k=top_k
	)

	if self.current_task == "hard":
	policy_result = check_policies(ranked_ids, self.candidate_posts)
	policy_score = policy_result["policy_score"]
	step_score = round(0.6 * ndcg + 0.4 * policy_score, 4)

	self.hard_scores.append(ndcg)
	self.hard_policy_scores.append(policy_score)
	self.hard_user_index += 1

	if self.hard_user_index < len(task_def["user_ids"]):
	next_user_id = task_def["user_ids"][self.hard_user_index]
	self.current_user = USERS[next_user_id]
	self.candidate_posts = select_candidates_enhanced(
	task_def["num_candidates"],
	seed=self.seed + hash(next_user_id) % 1000
	)
	self.steps_taken += 1

	return {
	"observation": self._build_observation(),
	"reward": step_score,
	"done": False,
	"info": {
	"step_ndcg": ndcg,
	"step_policy_score": policy_score,
	"policy_details": policy_result,
	"step_score": step_score,
	"users_remaining": len(task_def["user_ids"]) - self.hard_user_index,
	}
	}
	else:
	avg_ndcg = round(statistics.mean(self.hard_scores), 4)
	avg_policy = round(statistics.mean(self.hard_policy_scores), 4)
	final_score = round(0.6 * avg_ndcg + 0.4 * avg_policy, 4)

	self.total_score = final_score
	self.episode_done = True
	self.steps_taken += 1

	return {
	"observation": (
	f"All 3 users scored. Avg NDCG={avg_ndcg}, "
	f"Avg Policy={avg_policy}, Final={final_score}"
	),
	"reward": final_score,
	"done": True,
	"info": {
	"final_score": final_score,
	"avg_ndcg": avg_ndcg,
	"avg_policy_score": avg_policy,
	"per_user_ndcg": self.hard_scores,
	"per_user_policy": self.hard_policy_scores,
	}
	}
	else:
	self.total_score = ndcg
	self.episode_done = True
	self.steps_taken += 1

	if ndcg >= 0.9:
	feedback = f"Excellent ranking! NDCG={ndcg}"
	elif ndcg >= 0.7:
	feedback = f"Good ranking. NDCG={ndcg}"
	elif ndcg >= 0.5:
	feedback = f"Decent ranking. NDCG={ndcg}"
	else:
	feedback = f"Poor ranking. NDCG={ndcg}"

	return {
	"observation": feedback,
	"reward": ndcg,
	"done": True,
	"info": {"score": ndcg, "task": self.current_task}
	}

	def _build_observation(self) -> dict:
	"""Build what the agent sees."""

	user = self.current_user
	task_def = TASKS[self.current_task]

	user_info = {"id": user["id"], "archetype": user["archetype"]}
	if user["stated_interests"]:
	user_info["stated_interests"] = user["stated_interests"]
	if user["stated_dislikes"]:
	user_info["stated_dislikes"] = user["stated_dislikes"]
	if user["engagement_history"]:
	user_info["engagement_history"] = user["engagement_history"]

	candidates = []
	for p in self.candidate_posts:
	candidates.append({
	"id": p["id"], "title": p["title"], "topic": p["topic"],
	"content_type": p["content_type"], "author_id": p["author_id"],
	"author_popularity": p["author_popularity"],
	"is_new_creator": p["is_new_creator"],
	"quality_score": p["quality_score"],
	"is_clickbait": p["is_clickbait"], "age_hours": p["age_hours"],
	})

	obs = {
	"task": self.current_task,
	"description": task_def["description"],
	"user": user_info,
	"candidate_posts": candidates,
	"action_required": f"Return top {task_def['top_k']} post IDs ranked best-first",
	}

	if task_def.get("policies_enforced"):
	obs["policies"] = {
	"min_topics_in_top_10": 3,
	"max_same_author_in_top_5": 2,
	"min_new_creator_in_top_10": 1,
	"max_clickbait_in_top_5": 1,
	}
	obs["scoring"] = "0.6 * NDCG + 0.4 * policy_compliance"

	return obs

	def state(self):
	"""Return current state."""
	return {
	"current_task": self.current_task,
	"current_user_id": self.current_user["id"] if self.current_user else None,
	"num_candidates": len(self.candidate_posts),
	"episode_done": self.episode_done,
	"steps_taken": self.steps_taken,
	"total_score": self.total_score,
	}


	# ============================================================
	# FASTAPI SERVER
	# ============================================================

	app = FastAPI(
	title="Feed Ranking Environment",
	description=(
	"An OpenEnv environment where AI agents learn to rank social media feeds. "
	"Agents must balance user engagement (NDCG), content quality, and platform policies."
	),
	version="1.0.0"
	)

	env = FeedRankingEnv()


	@app.post("/reset")
	def reset(task: str = "easy"):
	return env.reset(task)


	@app.post("/step")
	def step(action: RankingAction):
	return env.step(action.dict())


	@app.get("/state")
	def state():
	return env.state()


	@app.get("/tasks")
	def tasks():
	return {
	task_name: {
	"description": td["description"],
	"action_schema": td["action_schema"],
	"num_candidates": td["num_candidates"],
	"top_k": td["top_k"],
	"num_users": len(td["user_ids"]),
	"policies_enforced": td["policies_enforced"],
	}
	for task_name, td in TASKS.items()
	}


	@app.get("/grader")
	def grader(task: str = "easy"):
	"""Naive baseline — random ranking."""

	if task not in TASKS:
	raise HTTPException(status_code=400, detail=f"Unknown task: {task}")

	task_def = TASKS[task]
	scores = []

	for user_id in task_def["user_ids"]:
	user = USERS[user_id]
	candidates = select_candidates(
	task_def["num_candidates"],
	seed=42 + hash(task if task != "hard" else user_id) % 1000
	)

	rng = random.Random(123)
	naive_ranking = [p["id"] for p in candidates]
	rng.shuffle(naive_ranking)
	naive_ranking = naive_ranking[:task_def["top_k"]]

	ndcg = compute_ndcg(user, naive_ranking, candidates, k=task_def["top_k"])

	if task == "hard":
	policy = check_policies(naive_ranking, candidates)
	score = round(0.6 * ndcg + 0.4 * policy["policy_score"], 4)
	else:
	score = ndcg

	scores.append(score)

	average = round(statistics.mean(scores), 4)

	return {
	"task": task,
	"num_users": len(task_def["user_ids"]),
	"scores": scores,
	"average_score": average,
	"note": "Naive baseline — random ranking of candidate posts"
	}


	# ============================================================
	# /baseline — AI model against all 3 tasks
	# ============================================================

	@app.get("/baseline")
	def baseline():
	"""Run AI model against all 3 tasks."""

	api_base_url = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
	model_name = os.getenv("MODEL_NAME", "gpt-4o-mini")
	api_key = os.getenv("HF_TOKEN") or os.getenv("OPENAI_API_KEY")

	if not api_key:
	raise HTTPException(status_code=500, detail="No API key found.")

	client = OpenAI(base_url=api_base_url, api_key=api_key)
	all_results = {}

	for task in ["easy", "medium", "hard"]:
	task_def = TASKS[task]
	scores = []
	details = []

	for user_id in task_def["user_ids"]:
	user = USERS[user_id]
	candidates = select_candidates(
	task_def["num_candidates"],
	seed=42 + hash(task if task != "hard" else user_id) % 1000
	)

	prompt = _build_ranking_prompt(user, candidates, task_def)
	ai_response = _call_model(client, model_name, prompt)
	parsed = _parse_ranking_response(ai_response, candidates, task_def["top_k"])

	ndcg = compute_ndcg(user, parsed, candidates, k=task_def["top_k"])

	if task == "hard":
	policy = check_policies(parsed, candidates)
	score = round(0.6 * ndcg + 0.4 * policy["policy_score"], 4)
	else:
	score = ndcg

	scores.append(score)
	details.append({
	"user_id": user_id,
	"parsed_ranking": parsed,
	"ndcg": ndcg,
	"score": score,
	})

	average = round(statistics.mean(scores), 4)
	all_results[task] = {
	"average_score": average,
	"scores": scores,
	"details": details,
	}

	return {
	"model": model_name,
	"results": all_results,
	"summary": {
	"easy": all_results["easy"]["average_score"],
	"medium": all_results["medium"]["average_score"],
	"hard": all_results["hard"]["average_score"],
	}
	}


	# ============================================================
	# HELPER FUNCTIONS
	# ============================================================

	def _build_ranking_prompt(user: dict, candidates: list, task_def: dict) -> str:
	"""Build prompt for AI model."""

	user_info = f"User ID: {user['id']}\nArchetype: {user['archetype']}\n"
	if user.get("stated_interests"):
	user_info += f"Stated interests: {', '.join(user['stated_interests'])}\n"
	if user.get("stated_dislikes"):
	user_info += f"Stated dislikes: {', '.join(user['stated_dislikes'])}\n"
	if user.get("engagement_history"):
	history_str = "\n".join(
	f" - {h['action']} post about {h['topic']} (post {h['post_id']})"
	for h in user["engagement_history"]
	)
	user_info += f"Engagement history:\n{history_str}\n"

	posts_str = "\n".join(
	f" {p['id']}: \"{p['title']}\" \| topic={p['topic']} \| type={p['content_type']} \| "
	f"quality={p['quality_score']} \| clickbait={p['is_clickbait']} \| "
	f"new_creator={p['is_new_creator']} \| age={p['age_hours']}h \| "
	f"author={p['author_id']} (pop={p['author_popularity']})"
	for p in candidates
	)

	policy_str = ""
	if task_def.get("policies_enforced"):
	policy_str = (
	"\n\nPLATFORM POLICIES (must satisfy):\n"
	"- At least 3 different topics in your top 10\n"
	"- Max 2 posts from same author in top 5\n"
	"- At least 1 new creator post in top 10\n"
	"- Max 1 clickbait post in top 5\n"
	)

	top_k = task_def["top_k"]

	return f"""You are a feed ranking algorithm. Rank the top {top_k} posts for this user.

	USER PROFILE:
	{user_info}

	CANDIDATE POSTS:
	{posts_str}
	{policy_str}

	Respond with ONLY a JSON object: {{"ranked_post_ids": ["p01", "p39", ...]}}
	Exactly {top_k} post IDs, ordered best-first. No explanation."""


	def _call_model(client: OpenAI, model_name: str, prompt: str) -> str:
	"""Call any OpenAI-compatible API."""
	try:
	response = client.chat.completions.create(
	model=model_name,
	messages=[{"role": "user", "content": prompt}],
	max_tokens=200,
	temperature=0.1
	)
	content = response.choices[0].message.content
	return content.strip() if content else "error: empty response"
	except Exception as e:
	print(f"API error: {str(e)}")
	return f"error: {str(e)}"


	def _parse_ranking_response(response: str, candidates: list, top_k: int) -> list:
	"""Parse AI response into list of post IDs."""

	valid_ids = {p["id"] for p in candidates}

	try:
	start = response.find("{")
	end = response.rfind("}") + 1
	if start != -1 and end > start:
	parsed = json.loads(response[start:end])
	ids = parsed.get("ranked_post_ids", [])
	ids = [pid for pid in ids if pid in valid_ids]
	if ids:
	return ids[:top_k]
	except:
	pass

	rng = random.Random(999)
	fallback = [p["id"] for p in candidates]
	rng.shuffle(fallback)
	return fallback[:top_k]


	# ============================================================
	# 3 ENDPOINTS (required by OpenEnv validator): /metadata, /schema, /mcp
	# ============================================================


	# --- 1. /metadata endpoint ---
	@app.get("/metadata")
	def metadata():
	"""Return environment name and description (required by OpenEnv validator)."""
	return {
	"name": "feed-ranking",
	"description": (
	"A social media feed ranking environment where AI agents learn to "
	"personalize content feeds. Agents must balance user engagement (NDCG), "
	"content quality, and platform policies across three difficulty levels."
	),
	"version": "1.0.0",
	"author": "DevanshuDon",
	"tasks": ["easy", "medium", "hard"],
	}


	# --- 2. /schema endpoint ---
	@app.get("/schema")
	def schema():
	"""Return action, observation, and state schemas (required by OpenEnv validator)."""
	return {
	"action": {
	"type": "object",
	"properties": {
	"ranked_post_ids": {
	"type": "array",
	"items": {"type": "string"},
	"description": "List of 10 post IDs in ranked order (best first)",
	}
	},
	"required": ["ranked_post_ids"],
	},
	"observation": {
	"type": "object",
	"properties": {
	"task": {"type": "string", "description": "Current task name"},
	"description": {"type": "string", "description": "Task description"},
	"user": {
	"type": "object",
	"description": "User profile with id, archetype, interests, and history",
	},
	"candidate_posts": {
	"type": "array",
	"items": {"type": "object"},
	"description": "List of candidate posts to rank",
	},
	"action_required": {"type": "string", "description": "What the agent must return"},
	},
	},
	"state": {
	"type": "object",
	"properties": {
	"current_task": {"type": "string"},
	"current_user_id": {"type": "string"},
	"num_candidates": {"type": "integer"},
	"episode_done": {"type": "boolean"},
	"steps_taken": {"type": "integer"},
	"total_score": {"type": "number"},
	},
	},
	}


	# --- 3. /mcp endpoint (JSON-RPC) ---
	@app.post("/mcp")
	async def mcp_endpoint(request_body: dict = {}):
	"""MCP JSON-RPC endpoint (required by OpenEnv validator)."""

	method = request_body.get("method", "")
	req_id = request_body.get("id", 1)

	if method == "initialize":
	return {
	"jsonrpc": "2.0",
	"id": req_id,
	"result": {
	"protocolVersion": "2024-11-05",
	"serverInfo": {
	"name": "feed-ranking",
	"version": "1.0.0",
	},
	"capabilities": {
	"tools": {"listChanged": False},
	},
	},
	}

	elif method == "tools/list":
	return {
	"jsonrpc": "2.0",
	"id": req_id,
	"result": {
	"tools": [
	{
	"name": "reset",
	"description": "Start a new episode with a task (easy/medium/hard)",
	"inputSchema": {
	"type": "object",
	"properties": {
	"task": {"type": "string", "enum": ["easy", "medium", "hard"]}
	},
	},
	},
	{
	"name": "step",
	"description": "Submit a ranked list of post IDs",
	"inputSchema": {
	"type": "object",
	"properties": {
	"ranked_post_ids": {
	"type": "array",
	"items": {"type": "string"},
	}
	},
	"required": ["ranked_post_ids"],
	},
	},
	{
	"name": "state",
	"description": "Get the current environment state",
	"inputSchema": {"type": "object", "properties": {}},
	},
	],
	},
	}

	# Default response for any other method
	return {
	"jsonrpc": "2.0",
	"id": req_id,
	"result": {},
	}





	@app.get("/health")
	def health():
	return {"status": "healthy"}


	def main():
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=8000)

	if __name__ == "__main__":
	main()