File size: 2,990 Bytes
01e9350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os, re, sys, json,httpx
from openai import AsyncOpenAI
from dotenv import load_dotenv
import asyncio
load_dotenv()

client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"),
                     timeout=240.0)

class CodeGenerationLLM:
    async def generate_response(self,prompt: str) -> str:
        full_text = []
        response = await client.chat.completions.create(
            model="gpt-5-chat-latest",
            temperature=0.1,
            messages=[
                {"role": "system", "content": "You are an expert in PyMongo query generation with valid JSON format."},
                {"role": "user", "content": prompt}
            ],
            stream=True,
            max_tokens=2500,
            #max_completion_tokens=3800
            response_format={"type": "json_object"}
        )
        async for chunk in response:
            delta = chunk.choices[0].delta
            if delta and delta.content:
                token = delta.content
                print(token, end="", flush=True)
                full_text.append(token)
        return "".join(full_text).strip()

    async def generate_final_output(self,prompt) -> str:
        llm_output = await self.generate_response(prompt)
        return llm_output


class OpenRouterClient:
    def __init__(self):
        load_dotenv()

        self.api_key = os.getenv("OPENROUTER_API_KEY")

        self.url = "https://openrouter.ai/api/v1/chat/completions"

        self.headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
        }

    async def stream_chat(self, prompt):
        payload = {
            "model": "mistralai/codestral-2508", # anthropic/claude-sonnet-4.6 ,mistralai/codestral-2508
            "stream": True,
            "max_tokens": 3000,
            "temperature": 0.1,
            "messages": [
                {"role": "system", "content": "You are an expert in PyMongo query generation with valid JSON format."},
                {"role": "user", "content": prompt}
            ]
        }

        full_text = ""

        async with httpx.AsyncClient(timeout=60) as client:
            async with client.stream(
                    "POST",
                    self.url,
                    headers=self.headers,
                    json=payload
            ) as r:

                async for line in r.aiter_lines():

                    if not line or not line.startswith("data:"):
                        continue

                    data = line.removeprefix("data: ").strip()

                    if data == "[DONE]":
                        break

                    chunk = json.loads(data)
                    delta = chunk["choices"][0]["delta"].get("content")

                    if delta:
                        print(delta, end="", flush=True)
                        full_text += delta

        return full_text