python_project_explainer / modal_client.py
lafifi-24's picture
i
933c2fa
import os
from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor
# Define your models
EXPLANATION_MODEL = "Qwen/Qwen3-4B-Instruct-2507"
EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-8B"
class ModalClient:
@staticmethod
def infer_llm(prompts: list[str], max_tokens: int = 800):
client = OpenAI(
base_url=os.environ.get("MODAL_URL_LLM_INFERENCE"),
api_key=os.environ.get('VLLM_API_KEY', 'not-needed')
)
def process_one(prompt):
response = client.chat.completions.create(
model=EXPLANATION_MODEL,
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens
)
return response.choices[0].message.content
with ThreadPoolExecutor(max_workers=32) as executor:
results = list(executor.map(process_one, prompts))
return results
@staticmethod
def embedding_config():
return {
"base_url":os.environ.get("MODAL_URL_LLM_EMBEDDING"),
"api_key":os.environ.get('VLLM_API_KEY', 'not-needed'),
"model":EMBEDDING_MODEL
}