|
|
"""
|
|
|
LLM helper module for OpenAI GPT integration.
|
|
|
Used for reasoning, OCR, and complex question parsing.
|
|
|
"""
|
|
|
import os
|
|
|
import logging
|
|
|
from typing import Optional, Dict, Any
|
|
|
import openai
|
|
|
from openai import AsyncOpenAI
|
|
|
import httpx
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
client: Optional[AsyncOpenAI] = None
|
|
|
|
|
|
|
|
|
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
|
|
|
OPENROUTER_BASE_URL = os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
|
|
|
OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "gpt-5-nano")
|
|
|
OPENROUTER_SITE_URL = os.getenv("OPENROUTER_SITE_URL", "http://localhost")
|
|
|
OPENROUTER_APP_NAME = os.getenv("OPENROUTER_APP_NAME", "IITM LLM Quiz Solver")
|
|
|
|
|
|
|
|
|
def initialize_llm() -> None:
|
|
|
"""
|
|
|
Initialize OpenAI client with API key from environment.
|
|
|
"""
|
|
|
global client
|
|
|
api_key = os.getenv("OPENAI_API_KEY")
|
|
|
if api_key:
|
|
|
client = AsyncOpenAI(api_key=api_key)
|
|
|
logger.info("OpenAI client initialized")
|
|
|
else:
|
|
|
if OPENROUTER_API_KEY:
|
|
|
logger.info("OPENAI_API_KEY not set, using OpenRouter only")
|
|
|
else:
|
|
|
logger.warning("No OPENAI_API_KEY or OPENROUTER_API_KEY set, LLM features will be disabled")
|
|
|
|
|
|
|
|
|
async def ask_gpt(prompt: str, model: str = "gpt-4o-mini", max_tokens: int = 2000) -> Optional[str]:
|
|
|
"""
|
|
|
Query OpenAI GPT model with a prompt.
|
|
|
|
|
|
Args:
|
|
|
prompt: The prompt/question to ask
|
|
|
model: Model to use (default: gpt-4o-mini)
|
|
|
max_tokens: Maximum tokens in response
|
|
|
|
|
|
Returns:
|
|
|
Response text or None if error
|
|
|
"""
|
|
|
global client
|
|
|
|
|
|
try:
|
|
|
if client:
|
|
|
response = await client.chat.completions.create(
|
|
|
model=model,
|
|
|
messages=[
|
|
|
{"role": "system", "content": "You are a helpful assistant that solves quiz questions accurately and concisely."},
|
|
|
{"role": "user", "content": prompt}
|
|
|
],
|
|
|
max_tokens=max_tokens,
|
|
|
temperature=0.3
|
|
|
)
|
|
|
|
|
|
answer = response.choices[0].message.content
|
|
|
logger.info(f"GPT response received (model: {model})")
|
|
|
return answer
|
|
|
else:
|
|
|
logger.warning("OpenAI client not initialized, attempting OpenRouter fallback")
|
|
|
return await ask_openrouter(prompt, max_tokens=max_tokens)
|
|
|
|
|
|
except Exception as e:
|
|
|
logger.error(f"Error calling OpenAI API: {e}")
|
|
|
|
|
|
fallback = await ask_openrouter(prompt, max_tokens=max_tokens)
|
|
|
if fallback:
|
|
|
return fallback
|
|
|
return None
|
|
|
|
|
|
|
|
|
async def ask_openrouter(prompt: str, model: Optional[str] = None, max_tokens: int = 2000) -> Optional[str]:
|
|
|
"""
|
|
|
Query OpenRouter (e.g., GPT-5-nano) with a prompt.
|
|
|
|
|
|
Args:
|
|
|
prompt: Prompt text
|
|
|
model: Model to use (defaults to OPENROUTER_MODEL)
|
|
|
max_tokens: Maximum tokens
|
|
|
|
|
|
Returns:
|
|
|
Response text or None
|
|
|
"""
|
|
|
if not OPENROUTER_API_KEY:
|
|
|
logger.warning("OPENROUTER_API_KEY not set, cannot call OpenRouter")
|
|
|
return None
|
|
|
|
|
|
if not model:
|
|
|
model = OPENROUTER_MODEL
|
|
|
|
|
|
url = f"{OPENROUTER_BASE_URL.rstrip('/')}/chat/completions"
|
|
|
headers = {
|
|
|
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
|
|
"HTTP-Referer": OPENROUTER_SITE_URL,
|
|
|
"X-Title": OPENROUTER_APP_NAME,
|
|
|
"Content-Type": "application/json",
|
|
|
}
|
|
|
payload = {
|
|
|
"model": model,
|
|
|
"messages": [
|
|
|
{"role": "system", "content": "You are a helpful assistant that solves quiz questions accurately and concisely."},
|
|
|
{"role": "user", "content": prompt}
|
|
|
],
|
|
|
"max_tokens": max_tokens,
|
|
|
"temperature": 0.2
|
|
|
}
|
|
|
|
|
|
try:
|
|
|
async with httpx.AsyncClient(timeout=60) as http_client:
|
|
|
response = await http_client.post(url, headers=headers, json=payload)
|
|
|
response.raise_for_status()
|
|
|
data = response.json()
|
|
|
answer = data["choices"][0]["message"]["content"]
|
|
|
logger.info(f"OpenRouter response received (model: {model})")
|
|
|
return answer
|
|
|
except Exception as e:
|
|
|
logger.error(f"Error calling OpenRouter API: {e}")
|
|
|
return None
|
|
|
|
|
|
|
|
|
async def parse_question_with_llm(question_text: str, context: str = "") -> Optional[Dict[str, Any]]:
|
|
|
"""
|
|
|
Use LLM to parse and understand a quiz question.
|
|
|
|
|
|
Args:
|
|
|
question_text: The question text
|
|
|
context: Additional context from the page
|
|
|
|
|
|
Returns:
|
|
|
Parsed question structure or None
|
|
|
"""
|
|
|
prompt = f"""Analyze this quiz question and provide a structured response:
|
|
|
|
|
|
Question: {question_text}
|
|
|
|
|
|
Context: {context}
|
|
|
|
|
|
Please identify:
|
|
|
1. What type of question is this? (scraping, calculation, API call, data analysis, etc.)
|
|
|
2. What data or resources are needed?
|
|
|
3. What is the expected answer format? (JSON, number, text, etc.)
|
|
|
|
|
|
Respond in JSON format:
|
|
|
{{
|
|
|
"type": "question_type",
|
|
|
"requirements": ["requirement1", "requirement2"],
|
|
|
"answer_format": "format_type",
|
|
|
"reasoning": "your reasoning"
|
|
|
}}
|
|
|
"""
|
|
|
|
|
|
response = await ask_gpt(prompt)
|
|
|
if not response:
|
|
|
return None
|
|
|
|
|
|
|
|
|
import json
|
|
|
import re
|
|
|
|
|
|
json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response, re.DOTALL)
|
|
|
if json_match:
|
|
|
try:
|
|
|
return json.loads(json_match.group())
|
|
|
except json.JSONDecodeError:
|
|
|
pass
|
|
|
|
|
|
return {"raw_response": response}
|
|
|
|
|
|
|
|
|
async def solve_with_llm(question: str, available_data: Dict[str, Any]) -> Optional[str]:
|
|
|
"""
|
|
|
Use LLM to solve a quiz question.
|
|
|
|
|
|
Args:
|
|
|
question: The question text
|
|
|
available_data: Any data extracted from the page
|
|
|
|
|
|
Returns:
|
|
|
Answer or None
|
|
|
"""
|
|
|
prompt = f"""Solve this quiz question:
|
|
|
|
|
|
Question: {question}
|
|
|
|
|
|
Available Data:
|
|
|
{available_data}
|
|
|
|
|
|
Provide a clear, concise answer. If the answer should be in JSON format, provide valid JSON.
|
|
|
If it's a calculation, show your work briefly.
|
|
|
"""
|
|
|
|
|
|
return await ask_gpt(prompt, max_tokens=3000)
|
|
|
|
|
|
|
|
|
async def ocr_image_with_llm(image_base64: str) -> Optional[str]:
|
|
|
"""
|
|
|
Use GPT-4 Vision to extract text from an image.
|
|
|
|
|
|
Note: Requires GPT-4 Vision model (gpt-4o or gpt-4-vision-preview).
|
|
|
gpt-4o-mini does not support vision.
|
|
|
|
|
|
Args:
|
|
|
image_base64: Base64 encoded image
|
|
|
|
|
|
Returns:
|
|
|
Extracted text or None
|
|
|
"""
|
|
|
global client
|
|
|
|
|
|
if not client:
|
|
|
return None
|
|
|
|
|
|
|
|
|
vision_models = ["gpt-4o", "gpt-4-vision-preview"]
|
|
|
|
|
|
for model in vision_models:
|
|
|
try:
|
|
|
response = await client.chat.completions.create(
|
|
|
model=model,
|
|
|
messages=[
|
|
|
{
|
|
|
"role": "user",
|
|
|
"content": [
|
|
|
{"type": "text", "text": "Extract all text from this image. Return only the text content."},
|
|
|
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}}
|
|
|
]
|
|
|
}
|
|
|
],
|
|
|
max_tokens=1000
|
|
|
)
|
|
|
|
|
|
return response.choices[0].message.content
|
|
|
|
|
|
except Exception as e:
|
|
|
logger.warning(f"Error with model {model}: {e}")
|
|
|
continue
|
|
|
|
|
|
logger.error("No vision-capable model available")
|
|
|
return None
|
|
|
|
|
|
|