RJuro Claude Opus 4.6 commited on
Commit
b4c4ef5
·
1 Parent(s): 8019d92

Fix: use OpenAI-compatible client for Groq (moonshotai/kimi-k2-instruct)

Browse files

The Groq native SDK uses different model IDs than the OpenAI-compatible
endpoint. Switch to openai SDK with Groq's base_url to match the
notebook pattern, using the full model ID moonshotai/kimi-k2-instruct.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. app.py +8 -8
  2. requirements.txt +1 -1
app.py CHANGED
@@ -12,7 +12,7 @@ from fastapi.responses import FileResponse, StreamingResponse
12
  from fastapi.staticfiles import StaticFiles
13
  from sentence_transformers import SentenceTransformer, CrossEncoder
14
  import chromadb
15
- import groq
16
  from dotenv import load_dotenv
17
 
18
  load_dotenv()
@@ -27,12 +27,12 @@ app = FastAPI(title="SciFact Multilingual Semantic Search")
27
  model: SentenceTransformer = None
28
  reranker: CrossEncoder = None
29
  collection: chromadb.Collection = None
30
- groq_client: groq.Groq = None
31
 
32
 
33
  @app.on_event("startup")
34
  def startup():
35
- global model, reranker, collection, groq_client
36
  print(f"Loading model: {MODEL_NAME}")
37
  model = SentenceTransformer(MODEL_NAME)
38
  print(f"Loading reranker: {RERANKER_MODEL}")
@@ -43,8 +43,8 @@ def startup():
43
  print(f"Collection '{COLLECTION_NAME}': {collection.count()} documents ready.")
44
  api_key = os.environ.get("GROQ_API_KEY")
45
  if api_key:
46
- groq_client = groq.Groq(api_key=api_key)
47
- print("Groq client initialized.")
48
  else:
49
  print("GROQ_API_KEY not set — /synthesize will be unavailable.")
50
 
@@ -130,7 +130,7 @@ def synthesize(
130
  q: str = Query(..., min_length=1),
131
  results: str = Query(..., min_length=1),
132
  ):
133
- if not groq_client:
134
  def error_stream():
135
  yield "data: [ERROR] GROQ_API_KEY not configured.\n\n"
136
  return StreamingResponse(error_stream(), media_type="text/event-stream")
@@ -143,8 +143,8 @@ def synthesize(
143
  user_msg = f"Query: {q}\n\nRetrieved results:\n{context}"
144
 
145
  def token_stream():
146
- stream = groq_client.chat.completions.create(
147
- model="kimi-k2-instruct",
148
  messages=[
149
  {"role": "system", "content": SYNTH_SYSTEM_PROMPT},
150
  {"role": "user", "content": user_msg},
 
12
  from fastapi.staticfiles import StaticFiles
13
  from sentence_transformers import SentenceTransformer, CrossEncoder
14
  import chromadb
15
+ from openai import OpenAI
16
  from dotenv import load_dotenv
17
 
18
  load_dotenv()
 
27
  model: SentenceTransformer = None
28
  reranker: CrossEncoder = None
29
  collection: chromadb.Collection = None
30
+ llm_client: OpenAI = None
31
 
32
 
33
  @app.on_event("startup")
34
  def startup():
35
+ global model, reranker, collection, llm_client
36
  print(f"Loading model: {MODEL_NAME}")
37
  model = SentenceTransformer(MODEL_NAME)
38
  print(f"Loading reranker: {RERANKER_MODEL}")
 
43
  print(f"Collection '{COLLECTION_NAME}': {collection.count()} documents ready.")
44
  api_key = os.environ.get("GROQ_API_KEY")
45
  if api_key:
46
+ llm_client = OpenAI(api_key=api_key, base_url="https://api.groq.com/openai/v1")
47
+ print("Groq LLM client initialized (OpenAI-compatible).")
48
  else:
49
  print("GROQ_API_KEY not set — /synthesize will be unavailable.")
50
 
 
130
  q: str = Query(..., min_length=1),
131
  results: str = Query(..., min_length=1),
132
  ):
133
+ if not llm_client:
134
  def error_stream():
135
  yield "data: [ERROR] GROQ_API_KEY not configured.\n\n"
136
  return StreamingResponse(error_stream(), media_type="text/event-stream")
 
143
  user_msg = f"Query: {q}\n\nRetrieved results:\n{context}"
144
 
145
  def token_stream():
146
+ stream = llm_client.chat.completions.create(
147
+ model="moonshotai/kimi-k2-instruct",
148
  messages=[
149
  {"role": "system", "content": SYNTH_SYSTEM_PROMPT},
150
  {"role": "user", "content": user_msg},
requirements.txt CHANGED
@@ -6,5 +6,5 @@ pandas
6
  pyarrow
7
  torch
8
  datasets
9
- groq
10
  python-dotenv
 
6
  pyarrow
7
  torch
8
  datasets
9
+ openai
10
  python-dotenv