import os, re, sys, json,httpx from openai import AsyncOpenAI from dotenv import load_dotenv import asyncio load_dotenv() client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"), timeout=240.0) class CodeGenerationLLM: async def generate_response(self,prompt: str) -> str: full_text = [] response = await client.chat.completions.create( model="gpt-5-chat-latest", temperature=0.1, messages=[ {"role": "system", "content": "You are an expert in PyMongo query generation with valid JSON format."}, {"role": "user", "content": prompt} ], stream=True, max_tokens=2500, #max_completion_tokens=3800 response_format={"type": "json_object"} ) async for chunk in response: delta = chunk.choices[0].delta if delta and delta.content: token = delta.content print(token, end="", flush=True) full_text.append(token) return "".join(full_text).strip() async def generate_final_output(self,prompt) -> str: llm_output = await self.generate_response(prompt) return llm_output class OpenRouterClient: def __init__(self): load_dotenv() self.api_key = os.getenv("OPENROUTER_API_KEY") self.url = "https://openrouter.ai/api/v1/chat/completions" self.headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", } async def stream_chat(self, prompt): payload = { "model": "mistralai/codestral-2508", # anthropic/claude-sonnet-4.6 ,mistralai/codestral-2508 "stream": True, "max_tokens": 3000, "temperature": 0.1, "messages": [ {"role": "system", "content": "You are an expert in PyMongo query generation with valid JSON format."}, {"role": "user", "content": prompt} ] } full_text = "" async with httpx.AsyncClient(timeout=60) as client: async with client.stream( "POST", self.url, headers=self.headers, json=payload ) as r: async for line in r.aiter_lines(): if not line or not line.startswith("data:"): continue data = line.removeprefix("data: ").strip() if data == "[DONE]": break chunk = json.loads(data) delta = chunk["choices"][0]["delta"].get("content") if delta: print(delta, end="", flush=True) full_text += delta return full_text