Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,13 +23,21 @@ def query_llm(prompt, model_name=None):
|
|
| 23 |
response = requests.post("http://localhost:11434/api/generate", json={"model": model_name, "prompt": prompt, "stream": False})
|
| 24 |
return response.json().get("response", "")
|
| 25 |
else:
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
return response["choices"][0]["message"]["content"]
|
| 34 |
|
| 35 |
# === Step 1: Parse intent ===
|
|
|
|
| 23 |
response = requests.post("http://localhost:11434/api/generate", json={"model": model_name, "prompt": prompt, "stream": False})
|
| 24 |
return response.json().get("response", "")
|
| 25 |
else:
|
| 26 |
+
# Replace or load from environment
|
| 27 |
+
client = OpenAI(
|
| 28 |
+
base_url="https://api.studio.nebius.com/v1/",
|
| 29 |
+
api_key=os.environ.get("NEBIUS_API_KEY")
|
| 30 |
+
)
|
| 31 |
+
response = client.chat.completions.create(
|
| 32 |
+
model="meta-llama/Llama-3.3-70B-Instruct",
|
| 33 |
+
max_tokens=512,
|
| 34 |
+
temperature=0.6,
|
| 35 |
+
top_p=0.9,
|
| 36 |
+
extra_body={
|
| 37 |
+
"top_k": 50
|
| 38 |
+
},
|
| 39 |
+
messages=[]
|
| 40 |
+
)
|
| 41 |
return response["choices"][0]["message"]["content"]
|
| 42 |
|
| 43 |
# === Step 1: Parse intent ===
|