Spaces:
Running
Running
Fix: use OpenAI-compatible client for Groq (moonshotai/kimi-k2-instruct)
Browse filesThe Groq native SDK uses different model IDs than the OpenAI-compatible
endpoint. Switch to openai SDK with Groq's base_url to match the
notebook pattern, using the full model ID moonshotai/kimi-k2-instruct.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- app.py +8 -8
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -12,7 +12,7 @@ from fastapi.responses import FileResponse, StreamingResponse
|
|
| 12 |
from fastapi.staticfiles import StaticFiles
|
| 13 |
from sentence_transformers import SentenceTransformer, CrossEncoder
|
| 14 |
import chromadb
|
| 15 |
-
import
|
| 16 |
from dotenv import load_dotenv
|
| 17 |
|
| 18 |
load_dotenv()
|
|
@@ -27,12 +27,12 @@ app = FastAPI(title="SciFact Multilingual Semantic Search")
|
|
| 27 |
model: SentenceTransformer = None
|
| 28 |
reranker: CrossEncoder = None
|
| 29 |
collection: chromadb.Collection = None
|
| 30 |
-
|
| 31 |
|
| 32 |
|
| 33 |
@app.on_event("startup")
|
| 34 |
def startup():
|
| 35 |
-
global model, reranker, collection,
|
| 36 |
print(f"Loading model: {MODEL_NAME}")
|
| 37 |
model = SentenceTransformer(MODEL_NAME)
|
| 38 |
print(f"Loading reranker: {RERANKER_MODEL}")
|
|
@@ -43,8 +43,8 @@ def startup():
|
|
| 43 |
print(f"Collection '{COLLECTION_NAME}': {collection.count()} documents ready.")
|
| 44 |
api_key = os.environ.get("GROQ_API_KEY")
|
| 45 |
if api_key:
|
| 46 |
-
|
| 47 |
-
print("Groq client initialized.")
|
| 48 |
else:
|
| 49 |
print("GROQ_API_KEY not set — /synthesize will be unavailable.")
|
| 50 |
|
|
@@ -130,7 +130,7 @@ def synthesize(
|
|
| 130 |
q: str = Query(..., min_length=1),
|
| 131 |
results: str = Query(..., min_length=1),
|
| 132 |
):
|
| 133 |
-
if not
|
| 134 |
def error_stream():
|
| 135 |
yield "data: [ERROR] GROQ_API_KEY not configured.\n\n"
|
| 136 |
return StreamingResponse(error_stream(), media_type="text/event-stream")
|
|
@@ -143,8 +143,8 @@ def synthesize(
|
|
| 143 |
user_msg = f"Query: {q}\n\nRetrieved results:\n{context}"
|
| 144 |
|
| 145 |
def token_stream():
|
| 146 |
-
stream =
|
| 147 |
-
model="kimi-k2-instruct",
|
| 148 |
messages=[
|
| 149 |
{"role": "system", "content": SYNTH_SYSTEM_PROMPT},
|
| 150 |
{"role": "user", "content": user_msg},
|
|
|
|
| 12 |
from fastapi.staticfiles import StaticFiles
|
| 13 |
from sentence_transformers import SentenceTransformer, CrossEncoder
|
| 14 |
import chromadb
|
| 15 |
+
from openai import OpenAI
|
| 16 |
from dotenv import load_dotenv
|
| 17 |
|
| 18 |
load_dotenv()
|
|
|
|
| 27 |
model: SentenceTransformer = None
|
| 28 |
reranker: CrossEncoder = None
|
| 29 |
collection: chromadb.Collection = None
|
| 30 |
+
llm_client: OpenAI = None
|
| 31 |
|
| 32 |
|
| 33 |
@app.on_event("startup")
|
| 34 |
def startup():
|
| 35 |
+
global model, reranker, collection, llm_client
|
| 36 |
print(f"Loading model: {MODEL_NAME}")
|
| 37 |
model = SentenceTransformer(MODEL_NAME)
|
| 38 |
print(f"Loading reranker: {RERANKER_MODEL}")
|
|
|
|
| 43 |
print(f"Collection '{COLLECTION_NAME}': {collection.count()} documents ready.")
|
| 44 |
api_key = os.environ.get("GROQ_API_KEY")
|
| 45 |
if api_key:
|
| 46 |
+
llm_client = OpenAI(api_key=api_key, base_url="https://api.groq.com/openai/v1")
|
| 47 |
+
print("Groq LLM client initialized (OpenAI-compatible).")
|
| 48 |
else:
|
| 49 |
print("GROQ_API_KEY not set — /synthesize will be unavailable.")
|
| 50 |
|
|
|
|
| 130 |
q: str = Query(..., min_length=1),
|
| 131 |
results: str = Query(..., min_length=1),
|
| 132 |
):
|
| 133 |
+
if not llm_client:
|
| 134 |
def error_stream():
|
| 135 |
yield "data: [ERROR] GROQ_API_KEY not configured.\n\n"
|
| 136 |
return StreamingResponse(error_stream(), media_type="text/event-stream")
|
|
|
|
| 143 |
user_msg = f"Query: {q}\n\nRetrieved results:\n{context}"
|
| 144 |
|
| 145 |
def token_stream():
|
| 146 |
+
stream = llm_client.chat.completions.create(
|
| 147 |
+
model="moonshotai/kimi-k2-instruct",
|
| 148 |
messages=[
|
| 149 |
{"role": "system", "content": SYNTH_SYSTEM_PROMPT},
|
| 150 |
{"role": "user", "content": user_msg},
|
requirements.txt
CHANGED
|
@@ -6,5 +6,5 @@ pandas
|
|
| 6 |
pyarrow
|
| 7 |
torch
|
| 8 |
datasets
|
| 9 |
-
|
| 10 |
python-dotenv
|
|
|
|
| 6 |
pyarrow
|
| 7 |
torch
|
| 8 |
datasets
|
| 9 |
+
openai
|
| 10 |
python-dotenv
|