Spaces:

RJuro
/

scifact-semantic-search

Running

RJuro Claude Opus 4.6 commited on Feb 12

Commit

b4c4ef5

1 Parent(s): 8019d92

Fix: use OpenAI-compatible client for Groq (moonshotai/kimi-k2-instruct)

The Groq native SDK uses different model IDs than the OpenAI-compatible
endpoint. Switch to openai SDK with Groq's base_url to match the
notebook pattern, using the full model ID moonshotai/kimi-k2-instruct.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (2) hide show

app.py +8 -8
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ from fastapi.responses import FileResponse, StreamingResponse
 from fastapi.staticfiles import StaticFiles
 from sentence_transformers import SentenceTransformer, CrossEncoder
 import chromadb
-import groq
 from dotenv import load_dotenv
 load_dotenv()
@@ -27,12 +27,12 @@ app = FastAPI(title="SciFact Multilingual Semantic Search")
 model: SentenceTransformer = None
 reranker: CrossEncoder = None
 collection: chromadb.Collection = None
-groq_client: groq.Groq = None
 @app.on_event("startup")
 def startup():
-    global model, reranker, collection, groq_client
     print(f"Loading model: {MODEL_NAME}")
     model = SentenceTransformer(MODEL_NAME)
     print(f"Loading reranker: {RERANKER_MODEL}")
@@ -43,8 +43,8 @@ def startup():
     print(f"Collection '{COLLECTION_NAME}': {collection.count()} documents ready.")
     api_key = os.environ.get("GROQ_API_KEY")
     if api_key:
-        groq_client = groq.Groq(api_key=api_key)
-        print("Groq client initialized.")
     else:
         print("GROQ_API_KEY not set — /synthesize will be unavailable.")
@@ -130,7 +130,7 @@ def synthesize(
     q: str = Query(..., min_length=1),
     results: str = Query(..., min_length=1),
 ):
-    if not groq_client:
         def error_stream():
             yield "data: [ERROR] GROQ_API_KEY not configured.\n\n"
         return StreamingResponse(error_stream(), media_type="text/event-stream")
@@ -143,8 +143,8 @@ def synthesize(
     user_msg = f"Query: {q}\n\nRetrieved results:\n{context}"
     def token_stream():
-        stream = groq_client.chat.completions.create(
-            model="kimi-k2-instruct",
             messages=[
                 {"role": "system", "content": SYNTH_SYSTEM_PROMPT},
                 {"role": "user", "content": user_msg},

 from fastapi.staticfiles import StaticFiles
 from sentence_transformers import SentenceTransformer, CrossEncoder
 import chromadb
+from openai import OpenAI
 from dotenv import load_dotenv
 load_dotenv()
 model: SentenceTransformer = None
 reranker: CrossEncoder = None
 collection: chromadb.Collection = None
+llm_client: OpenAI = None
 @app.on_event("startup")
 def startup():
+    global model, reranker, collection, llm_client
     print(f"Loading model: {MODEL_NAME}")
     model = SentenceTransformer(MODEL_NAME)
     print(f"Loading reranker: {RERANKER_MODEL}")
     print(f"Collection '{COLLECTION_NAME}': {collection.count()} documents ready.")
     api_key = os.environ.get("GROQ_API_KEY")
     if api_key:
+        llm_client = OpenAI(api_key=api_key, base_url="https://api.groq.com/openai/v1")
+        print("Groq LLM client initialized (OpenAI-compatible).")
     else:
         print("GROQ_API_KEY not set — /synthesize will be unavailable.")
     q: str = Query(..., min_length=1),
     results: str = Query(..., min_length=1),
 ):
+    if not llm_client:
         def error_stream():
             yield "data: [ERROR] GROQ_API_KEY not configured.\n\n"
         return StreamingResponse(error_stream(), media_type="text/event-stream")
     user_msg = f"Query: {q}\n\nRetrieved results:\n{context}"
     def token_stream():
+        stream = llm_client.chat.completions.create(
+            model="moonshotai/kimi-k2-instruct",
             messages=[
                 {"role": "system", "content": SYNTH_SYSTEM_PROMPT},
                 {"role": "user", "content": user_msg},

requirements.txt CHANGED Viewed

@@ -6,5 +6,5 @@ pandas
 pyarrow
 torch
 datasets
-groq
 python-dotenv

 pyarrow
 torch
 datasets
+openai
 python-dotenv