File size: 7,886 Bytes
2f95553 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 |
"""
LLM helper module for OpenAI GPT integration.
Used for reasoning, OCR, and complex question parsing.
"""
import os
import logging
from typing import Optional, Dict, Any
import openai
from openai import AsyncOpenAI
import httpx
logger = logging.getLogger(__name__)
# Initialize OpenAI client
client: Optional[AsyncOpenAI] = None
# OpenRouter configuration
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
OPENROUTER_BASE_URL = os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "gpt-5-nano")
OPENROUTER_SITE_URL = os.getenv("OPENROUTER_SITE_URL", "http://localhost")
OPENROUTER_APP_NAME = os.getenv("OPENROUTER_APP_NAME", "IITM LLM Quiz Solver")
def initialize_llm() -> None:
"""
Initialize OpenAI client with API key from environment.
"""
global client
api_key = os.getenv("OPENAI_API_KEY")
if api_key:
client = AsyncOpenAI(api_key=api_key)
logger.info("OpenAI client initialized")
else:
if OPENROUTER_API_KEY:
logger.info("OPENAI_API_KEY not set, using OpenRouter only")
else:
logger.warning("No OPENAI_API_KEY or OPENROUTER_API_KEY set, LLM features will be disabled")
async def ask_gpt(prompt: str, model: str = "gpt-4o-mini", max_tokens: int = 2000) -> Optional[str]:
"""
Query OpenAI GPT model with a prompt.
Args:
prompt: The prompt/question to ask
model: Model to use (default: gpt-4o-mini)
max_tokens: Maximum tokens in response
Returns:
Response text or None if error
"""
global client
try:
if client:
response = await client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": "You are a helpful assistant that solves quiz questions accurately and concisely."},
{"role": "user", "content": prompt}
],
max_tokens=max_tokens,
temperature=0.3
)
answer = response.choices[0].message.content
logger.info(f"GPT response received (model: {model})")
return answer
else:
logger.warning("OpenAI client not initialized, attempting OpenRouter fallback")
return await ask_openrouter(prompt, max_tokens=max_tokens)
except Exception as e:
logger.error(f"Error calling OpenAI API: {e}")
# Fallback to OpenRouter if configured
fallback = await ask_openrouter(prompt, max_tokens=max_tokens)
if fallback:
return fallback
return None
async def ask_openrouter(prompt: str, model: Optional[str] = None, max_tokens: int = 2000) -> Optional[str]:
"""
Query OpenRouter (e.g., GPT-5-nano) with a prompt.
Args:
prompt: Prompt text
model: Model to use (defaults to OPENROUTER_MODEL)
max_tokens: Maximum tokens
Returns:
Response text or None
"""
if not OPENROUTER_API_KEY:
logger.warning("OPENROUTER_API_KEY not set, cannot call OpenRouter")
return None
if not model:
model = OPENROUTER_MODEL
url = f"{OPENROUTER_BASE_URL.rstrip('/')}/chat/completions"
headers = {
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"HTTP-Referer": OPENROUTER_SITE_URL,
"X-Title": OPENROUTER_APP_NAME,
"Content-Type": "application/json",
}
payload = {
"model": model,
"messages": [
{"role": "system", "content": "You are a helpful assistant that solves quiz questions accurately and concisely."},
{"role": "user", "content": prompt}
],
"max_tokens": max_tokens,
"temperature": 0.2
}
try:
async with httpx.AsyncClient(timeout=60) as http_client:
response = await http_client.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
answer = data["choices"][0]["message"]["content"]
logger.info(f"OpenRouter response received (model: {model})")
return answer
except Exception as e:
logger.error(f"Error calling OpenRouter API: {e}")
return None
async def parse_question_with_llm(question_text: str, context: str = "") -> Optional[Dict[str, Any]]:
"""
Use LLM to parse and understand a quiz question.
Args:
question_text: The question text
context: Additional context from the page
Returns:
Parsed question structure or None
"""
prompt = f"""Analyze this quiz question and provide a structured response:
Question: {question_text}
Context: {context}
Please identify:
1. What type of question is this? (scraping, calculation, API call, data analysis, etc.)
2. What data or resources are needed?
3. What is the expected answer format? (JSON, number, text, etc.)
Respond in JSON format:
{{
"type": "question_type",
"requirements": ["requirement1", "requirement2"],
"answer_format": "format_type",
"reasoning": "your reasoning"
}}
"""
response = await ask_gpt(prompt)
if not response:
return None
# Try to extract JSON from response
import json
import re
json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response, re.DOTALL)
if json_match:
try:
return json.loads(json_match.group())
except json.JSONDecodeError:
pass
return {"raw_response": response}
async def solve_with_llm(question: str, available_data: Dict[str, Any]) -> Optional[str]:
"""
Use LLM to solve a quiz question.
Args:
question: The question text
available_data: Any data extracted from the page
Returns:
Answer or None
"""
prompt = f"""Solve this quiz question:
Question: {question}
Available Data:
{available_data}
Provide a clear, concise answer. If the answer should be in JSON format, provide valid JSON.
If it's a calculation, show your work briefly.
"""
return await ask_gpt(prompt, max_tokens=3000)
async def ocr_image_with_llm(image_base64: str) -> Optional[str]:
"""
Use GPT-4 Vision to extract text from an image.
Note: Requires GPT-4 Vision model (gpt-4o or gpt-4-vision-preview).
gpt-4o-mini does not support vision.
Args:
image_base64: Base64 encoded image
Returns:
Extracted text or None
"""
global client
if not client:
return None
# Try vision-capable models
vision_models = ["gpt-4o", "gpt-4-vision-preview"]
for model in vision_models:
try:
response = await client.chat.completions.create(
model=model,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Extract all text from this image. Return only the text content."},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}}
]
}
],
max_tokens=1000
)
return response.choices[0].message.content
except Exception as e:
logger.warning(f"Error with model {model}: {e}")
continue
logger.error("No vision-capable model available")
return None
|