Spaces:

iitmbs24f
/

Prj2

Sleeping

App Files Files Community

Prj2 / llm.py

iitmbs24f

Upload 37 files

2f95553 verified about 1 month ago

raw

history blame contribute delete

7.89 kB

	"""
	LLM helper module for OpenAI GPT integration.
	Used for reasoning, OCR, and complex question parsing.
	"""
	import os
	import logging
	from typing import Optional, Dict, Any
	import openai
	from openai import AsyncOpenAI
	import httpx

	logger = logging.getLogger(__name__)

	# Initialize OpenAI client
	client: Optional[AsyncOpenAI] = None

	# OpenRouter configuration
	OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
	OPENROUTER_BASE_URL = os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
	OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "gpt-5-nano")
	OPENROUTER_SITE_URL = os.getenv("OPENROUTER_SITE_URL", "http://localhost")
	OPENROUTER_APP_NAME = os.getenv("OPENROUTER_APP_NAME", "IITM LLM Quiz Solver")


	def initialize_llm() -> None:
	"""
	Initialize OpenAI client with API key from environment.
	"""
	global client
	api_key = os.getenv("OPENAI_API_KEY")
	if api_key:
	client = AsyncOpenAI(api_key=api_key)
	logger.info("OpenAI client initialized")
	else:
	if OPENROUTER_API_KEY:
	logger.info("OPENAI_API_KEY not set, using OpenRouter only")
	else:
	logger.warning("No OPENAI_API_KEY or OPENROUTER_API_KEY set, LLM features will be disabled")


	async def ask_gpt(prompt: str, model: str = "gpt-4o-mini", max_tokens: int = 2000) -> Optional[str]:
	"""
	Query OpenAI GPT model with a prompt.

	Args:
	prompt: The prompt/question to ask
	model: Model to use (default: gpt-4o-mini)
	max_tokens: Maximum tokens in response

	Returns:
	Response text or None if error
	"""
	global client

	try:
	if client:
	response = await client.chat.completions.create(
	model=model,
	messages=[
	{"role": "system", "content": "You are a helpful assistant that solves quiz questions accurately and concisely."},
	{"role": "user", "content": prompt}
	],
	max_tokens=max_tokens,
	temperature=0.3
	)

	answer = response.choices[0].message.content
	logger.info(f"GPT response received (model: {model})")
	return answer
	else:
	logger.warning("OpenAI client not initialized, attempting OpenRouter fallback")
	return await ask_openrouter(prompt, max_tokens=max_tokens)

	except Exception as e:
	logger.error(f"Error calling OpenAI API: {e}")
	# Fallback to OpenRouter if configured
	fallback = await ask_openrouter(prompt, max_tokens=max_tokens)
	if fallback:
	return fallback
	return None


	async def ask_openrouter(prompt: str, model: Optional[str] = None, max_tokens: int = 2000) -> Optional[str]:
	"""
	Query OpenRouter (e.g., GPT-5-nano) with a prompt.

	Args:
	prompt: Prompt text
	model: Model to use (defaults to OPENROUTER_MODEL)
	max_tokens: Maximum tokens

	Returns:
	Response text or None
	"""
	if not OPENROUTER_API_KEY:
	logger.warning("OPENROUTER_API_KEY not set, cannot call OpenRouter")
	return None

	if not model:
	model = OPENROUTER_MODEL

	url = f"{OPENROUTER_BASE_URL.rstrip('/')}/chat/completions"
	headers = {
	"Authorization": f"Bearer {OPENROUTER_API_KEY}",
	"HTTP-Referer": OPENROUTER_SITE_URL,
	"X-Title": OPENROUTER_APP_NAME,
	"Content-Type": "application/json",
	}
	payload = {
	"model": model,
	"messages": [
	{"role": "system", "content": "You are a helpful assistant that solves quiz questions accurately and concisely."},
	{"role": "user", "content": prompt}
	],
	"max_tokens": max_tokens,
	"temperature": 0.2
	}

	try:
	async with httpx.AsyncClient(timeout=60) as http_client:
	response = await http_client.post(url, headers=headers, json=payload)
	response.raise_for_status()
	data = response.json()
	answer = data["choices"][0]["message"]["content"]
	logger.info(f"OpenRouter response received (model: {model})")
	return answer
	except Exception as e:
	logger.error(f"Error calling OpenRouter API: {e}")
	return None


	async def parse_question_with_llm(question_text: str, context: str = "") -> Optional[Dict[str, Any]]:
	"""
	Use LLM to parse and understand a quiz question.

	Args:
	question_text: The question text
	context: Additional context from the page

	Returns:
	Parsed question structure or None
	"""
	prompt = f"""Analyze this quiz question and provide a structured response:

	Question: {question_text}

	Context: {context}

	Please identify:
	1. What type of question is this? (scraping, calculation, API call, data analysis, etc.)
	2. What data or resources are needed?
	3. What is the expected answer format? (JSON, number, text, etc.)

	Respond in JSON format:
	{{
	"type": "question_type",
	"requirements": ["requirement1", "requirement2"],
	"answer_format": "format_type",
	"reasoning": "your reasoning"
	}}
	"""

	response = await ask_gpt(prompt)
	if not response:
	return None

	# Try to extract JSON from response
	import json
	import re

	json_match = re.search(r'\{[^{}](?:\{[^{}]\}[^{}])\}', response, re.DOTALL)
	if json_match:
	try:
	return json.loads(json_match.group())
	except json.JSONDecodeError:
	pass

	return {"raw_response": response}


	async def solve_with_llm(question: str, available_data: Dict[str, Any]) -> Optional[str]:
	"""
	Use LLM to solve a quiz question.

	Args:
	question: The question text
	available_data: Any data extracted from the page

	Returns:
	Answer or None
	"""
	prompt = f"""Solve this quiz question:

	Question: {question}

	Available Data:
	{available_data}

	Provide a clear, concise answer. If the answer should be in JSON format, provide valid JSON.
	If it's a calculation, show your work briefly.
	"""

	return await ask_gpt(prompt, max_tokens=3000)


	async def ocr_image_with_llm(image_base64: str) -> Optional[str]:
	"""
	Use GPT-4 Vision to extract text from an image.

	Note: Requires GPT-4 Vision model (gpt-4o or gpt-4-vision-preview).
	gpt-4o-mini does not support vision.

	Args:
	image_base64: Base64 encoded image

	Returns:
	Extracted text or None
	"""
	global client

	if not client:
	return None

	# Try vision-capable models
	vision_models = ["gpt-4o", "gpt-4-vision-preview"]

	for model in vision_models:
	try:
	response = await client.chat.completions.create(
	model=model,
	messages=[
	{
	"role": "user",
	"content": [
	{"type": "text", "text": "Extract all text from this image. Return only the text content."},
	{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}}
	]
	}
	],
	max_tokens=1000
	)

	return response.choices[0].message.content

	except Exception as e:
	logger.warning(f"Error with model {model}: {e}")
	continue

	logger.error("No vision-capable model available")
	return None