File size: 7,886 Bytes
2f95553
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
"""

LLM helper module for OpenAI GPT integration.

Used for reasoning, OCR, and complex question parsing.

"""
import os
import logging
from typing import Optional, Dict, Any
import openai
from openai import AsyncOpenAI
import httpx

logger = logging.getLogger(__name__)

# Initialize OpenAI client
client: Optional[AsyncOpenAI] = None

# OpenRouter configuration
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
OPENROUTER_BASE_URL = os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "gpt-5-nano")
OPENROUTER_SITE_URL = os.getenv("OPENROUTER_SITE_URL", "http://localhost")
OPENROUTER_APP_NAME = os.getenv("OPENROUTER_APP_NAME", "IITM LLM Quiz Solver")


def initialize_llm() -> None:
    """

    Initialize OpenAI client with API key from environment.

    """
    global client
    api_key = os.getenv("OPENAI_API_KEY")
    if api_key:
        client = AsyncOpenAI(api_key=api_key)
        logger.info("OpenAI client initialized")
    else:
        if OPENROUTER_API_KEY:
            logger.info("OPENAI_API_KEY not set, using OpenRouter only")
        else:
            logger.warning("No OPENAI_API_KEY or OPENROUTER_API_KEY set, LLM features will be disabled")


async def ask_gpt(prompt: str, model: str = "gpt-4o-mini", max_tokens: int = 2000) -> Optional[str]:
    """

    Query OpenAI GPT model with a prompt.

    

    Args:

        prompt: The prompt/question to ask

        model: Model to use (default: gpt-4o-mini)

        max_tokens: Maximum tokens in response

        

    Returns:

        Response text or None if error

    """
    global client
    
    try:
        if client:
            response = await client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": "You are a helpful assistant that solves quiz questions accurately and concisely."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=max_tokens,
                temperature=0.3
            )
            
            answer = response.choices[0].message.content
            logger.info(f"GPT response received (model: {model})")
            return answer
        else:
            logger.warning("OpenAI client not initialized, attempting OpenRouter fallback")
            return await ask_openrouter(prompt, max_tokens=max_tokens)
        
    except Exception as e:
        logger.error(f"Error calling OpenAI API: {e}")
        # Fallback to OpenRouter if configured
        fallback = await ask_openrouter(prompt, max_tokens=max_tokens)
        if fallback:
            return fallback
        return None


async def ask_openrouter(prompt: str, model: Optional[str] = None, max_tokens: int = 2000) -> Optional[str]:
    """

    Query OpenRouter (e.g., GPT-5-nano) with a prompt.

    

    Args:

        prompt: Prompt text

        model: Model to use (defaults to OPENROUTER_MODEL)

        max_tokens: Maximum tokens

        

    Returns:

        Response text or None

    """
    if not OPENROUTER_API_KEY:
        logger.warning("OPENROUTER_API_KEY not set, cannot call OpenRouter")
        return None
    
    if not model:
        model = OPENROUTER_MODEL
    
    url = f"{OPENROUTER_BASE_URL.rstrip('/')}/chat/completions"
    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        "HTTP-Referer": OPENROUTER_SITE_URL,
        "X-Title": OPENROUTER_APP_NAME,
        "Content-Type": "application/json",
    }
    payload = {
        "model": model,
        "messages": [
            {"role": "system", "content": "You are a helpful assistant that solves quiz questions accurately and concisely."},
            {"role": "user", "content": prompt}
        ],
        "max_tokens": max_tokens,
        "temperature": 0.2
    }
    
    try:
        async with httpx.AsyncClient(timeout=60) as http_client:
            response = await http_client.post(url, headers=headers, json=payload)
            response.raise_for_status()
            data = response.json()
            answer = data["choices"][0]["message"]["content"]
            logger.info(f"OpenRouter response received (model: {model})")
            return answer
    except Exception as e:
        logger.error(f"Error calling OpenRouter API: {e}")
        return None


async def parse_question_with_llm(question_text: str, context: str = "") -> Optional[Dict[str, Any]]:
    """

    Use LLM to parse and understand a quiz question.

    

    Args:

        question_text: The question text

        context: Additional context from the page

        

    Returns:

        Parsed question structure or None

    """
    prompt = f"""Analyze this quiz question and provide a structured response:



Question: {question_text}



Context: {context}



Please identify:

1. What type of question is this? (scraping, calculation, API call, data analysis, etc.)

2. What data or resources are needed?

3. What is the expected answer format? (JSON, number, text, etc.)



Respond in JSON format:

{{

    "type": "question_type",

    "requirements": ["requirement1", "requirement2"],

    "answer_format": "format_type",

    "reasoning": "your reasoning"

}}

"""
    
    response = await ask_gpt(prompt)
    if not response:
        return None
    
    # Try to extract JSON from response
    import json
    import re
    
    json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response, re.DOTALL)
    if json_match:
        try:
            return json.loads(json_match.group())
        except json.JSONDecodeError:
            pass
    
    return {"raw_response": response}


async def solve_with_llm(question: str, available_data: Dict[str, Any]) -> Optional[str]:
    """

    Use LLM to solve a quiz question.

    

    Args:

        question: The question text

        available_data: Any data extracted from the page

        

    Returns:

        Answer or None

    """
    prompt = f"""Solve this quiz question:



Question: {question}



Available Data:

{available_data}



Provide a clear, concise answer. If the answer should be in JSON format, provide valid JSON.

If it's a calculation, show your work briefly.

"""
    
    return await ask_gpt(prompt, max_tokens=3000)


async def ocr_image_with_llm(image_base64: str) -> Optional[str]:
    """

    Use GPT-4 Vision to extract text from an image.

    

    Note: Requires GPT-4 Vision model (gpt-4o or gpt-4-vision-preview).

    gpt-4o-mini does not support vision.

    

    Args:

        image_base64: Base64 encoded image

        

    Returns:

        Extracted text or None

    """
    global client
    
    if not client:
        return None
    
    # Try vision-capable models
    vision_models = ["gpt-4o", "gpt-4-vision-preview"]
    
    for model in vision_models:
        try:
            response = await client.chat.completions.create(
                model=model,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": "Extract all text from this image. Return only the text content."},
                            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}}
                        ]
                    }
                ],
                max_tokens=1000
            )
            
            return response.choices[0].message.content
            
        except Exception as e:
            logger.warning(f"Error with model {model}: {e}")
            continue
    
    logger.error("No vision-capable model available")
    return None