File size: 3,556 Bytes
b10bcbc 4735086 1444e6f b10bcbc 4735086 6416677 b10bcbc 1444e6f b10bcbc 48f5789 b10bcbc 9fd990f b10bcbc 4735086 0d22fa6 4735086 0d22fa6 4735086 0d22fa6 4735086 0d22fa6 4735086 0d22fa6 4735086 0d22fa6 4735086 0d22fa6 4735086 48f5789 4735086 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import os
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import List, Optional, Any
from app.schema.models import QuizOutput, QuizQuestion
from app.config import settings
from openai import AsyncOpenAI
from typing import List
client = AsyncOpenAI(
base_url="https://api.groq.com/openai/v1",
api_key=settings.GROQ_API_KEY
)
async def call_llm(prompt:str):
try:
response = await client.chat.completions.create(
# CRUCIAL: Use the LiteLLM format: 'gemini/gemini-2.5-pro'
model="openai/gpt-oss-120b",
messages=[
{"role": "user", "content": prompt}
],
# Use the OpenAI parameter to request JSON output
response_format={"type": "json_object"},
temperature=0.4,
)
json_string = response.choices[0].message.content
import json
quiz_data = json.loads(json_string)
wrapped_data = {"quiz": quiz_data}
return QuizOutput.model_validate(wrapped_data)
except Exception as e:
print(f"Error calling LiteLLM/Gemini: {e}")
raise e
async def stream_chat(messages: List[dict], context: str, retrieved_docs: str | None):
system_instruction = {
"role": "system",
"content": "You are a helpful AI assistant. Answer the user's question based on the provided context and retrieved documents."
}
conversation_history = [msg.copy() for msg in messages]
if conversation_history and conversation_history[-1]['role'] == 'user':
last_user_msg = conversation_history[-1]
original_question = last_user_msg['content']
# Start constructing the augmented prompt
augmented_content = ""
# 1. Add Manual Context
if context:
augmented_content += (
f"Here is the context/notes you must use:\n"
f"---------------------\n"
f"{context}\n"
f"---------------------\n\n"
)
# 2. Add Retrieved Documents (New Logic)
if retrieved_docs:
augmented_content += (
f"Here is background information/retrieved documents:\n"
f"---------------------\n"
f"{retrieved_docs}\n"
f"---------------------\n\n"
)
# 3. Add the User Question
augmented_content += f"User Question: {original_question}"
# Update the message content
last_user_msg['content'] = augmented_content
else:
# Fallback: If for some reason there is no user message, add one.
# We combine context and docs here too just in case.
combined_context = f"{context}\n\n{retrieved_docs or ''}"
conversation_history.append({
"role": "user",
"content": f"Context:\n{combined_context}\n\nPlease analyze this."
})
# 4. Combine System + Modified User History
full_history = [system_instruction] + conversation_history
try:
# Ensure 'client' is initialized before this function in your code
stream = await client.chat.completions.create(
model="openai/gpt-oss-120b",
messages=full_history,
temperature=0.7,
stream=True
)
async for chunk in stream:
if chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
except Exception as e:
print(f"Error in chat stream: {e}")
yield f"Error: {str(e)}" |