Spaces:
Running
Running
Commit ·
da92457
1
Parent(s): cf308ec
updated llm
Browse files- core_logic.py +4 -4
core_logic.py
CHANGED
|
@@ -10,12 +10,12 @@ from huggingface_hub import InferenceClient
|
|
| 10 |
from tools import web_search, parse_file
|
| 11 |
from groq import Groq
|
| 12 |
|
| 13 |
-
|
| 14 |
|
| 15 |
# Recommended: Qwen2.5-Coder-32B or Llama-3.1-70B-Instruct
|
| 16 |
#client = InferenceClient("deepseek-ai/DeepSeek-V4-Pro", token=os.getenv("HF_TOKEN"))
|
| 17 |
#client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct", token=os.getenv("HF_TOKEN"))
|
| 18 |
-
client = InferenceClient("Qwen/Qwen2.5-Coder-7B-Instruct", token=os.getenv("HF_TOKEN"))
|
| 19 |
#client = InferenceClient("llama-3.1-8b-instant", token=os.getenv("HF_TOKEN")) "llama-3.1-70b-versatile" -> GROQ API
|
| 20 |
#client = InferenceClient("meta-llama/Llama-3.1-8B-Instruct", token=os.getenv("HF_TOKEN")) # Or "Qwen/Qwen2.5-72B-Instruct"
|
| 21 |
|
|
@@ -67,9 +67,9 @@ def chat_function(message, history):
|
|
| 67 |
|
| 68 |
response_text = ""
|
| 69 |
try:
|
| 70 |
-
for chunk in client.chat_completion(messages, max_tokens=2048, stream=True, temperature=0.2):
|
| 71 |
# --- Uncomment below for GROQ
|
| 72 |
-
|
| 73 |
# FIX: Check if choices exists and is not empty
|
| 74 |
if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
|
| 75 |
token = chunk.choices[0].delta.content
|
|
|
|
| 10 |
from tools import web_search, parse_file
|
| 11 |
from groq import Groq
|
| 12 |
|
| 13 |
+
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
| 14 |
|
| 15 |
# Recommended: Qwen2.5-Coder-32B or Llama-3.1-70B-Instruct
|
| 16 |
#client = InferenceClient("deepseek-ai/DeepSeek-V4-Pro", token=os.getenv("HF_TOKEN"))
|
| 17 |
#client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct", token=os.getenv("HF_TOKEN"))
|
| 18 |
+
#client = InferenceClient("Qwen/Qwen2.5-Coder-7B-Instruct", token=os.getenv("HF_TOKEN"))
|
| 19 |
#client = InferenceClient("llama-3.1-8b-instant", token=os.getenv("HF_TOKEN")) "llama-3.1-70b-versatile" -> GROQ API
|
| 20 |
#client = InferenceClient("meta-llama/Llama-3.1-8B-Instruct", token=os.getenv("HF_TOKEN")) # Or "Qwen/Qwen2.5-72B-Instruct"
|
| 21 |
|
|
|
|
| 67 |
|
| 68 |
response_text = ""
|
| 69 |
try:
|
| 70 |
+
#for chunk in client.chat_completion(messages, max_tokens=2048, stream=True, temperature=0.2):
|
| 71 |
# --- Uncomment below for GROQ
|
| 72 |
+
for chunk in client.chat.completions.create(model="llama-3.1-70b-versatile", messages=messages, max_tokens=2048, stream=True, temperature=0.2): # Or model="llama-3.1-8b-instant"
|
| 73 |
# FIX: Check if choices exists and is not empty
|
| 74 |
if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
|
| 75 |
token = chunk.choices[0].delta.content
|