Spaces:
Sleeping
Sleeping
I'm so confused
Browse files- app.py +1 -3
- requirements.txt +12 -4
- services/api_llm.py +87 -0
- services/prompt.py +8 -0
app.py
CHANGED
|
@@ -13,9 +13,7 @@ MODEL_NAME, REPO_ID = select_best_model()
|
|
| 13 |
model_path = ensure_model()
|
| 14 |
print(f"Using model: {MODEL_NAME} from {REPO_ID}")
|
| 15 |
print(f"Model path: {model_path}")
|
| 16 |
-
print(f"Model size: {torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB")
|
| 17 |
print(f"Model requirements: {MODEL_NAME} requires at least 4GB VRAM and 8GB RAM.")
|
| 18 |
-
print(f"Model type: {'GPU' if torch.cuda.is_available() else 'CPU'}")
|
| 19 |
|
| 20 |
# 2) LLM and embeddings config
|
| 21 |
llm = build_llm(model_path)
|
|
@@ -286,4 +284,4 @@ if __name__ == "__main__":
|
|
| 286 |
server_port=7860,
|
| 287 |
share=True, # Enable sharing via Gradio's temporary URLs
|
| 288 |
show_api=True # Shows the API documentation
|
| 289 |
-
)
|
|
|
|
| 13 |
model_path = ensure_model()
|
| 14 |
print(f"Using model: {MODEL_NAME} from {REPO_ID}")
|
| 15 |
print(f"Model path: {model_path}")
|
|
|
|
| 16 |
print(f"Model requirements: {MODEL_NAME} requires at least 4GB VRAM and 8GB RAM.")
|
|
|
|
| 17 |
|
| 18 |
# 2) LLM and embeddings config
|
| 19 |
llm = build_llm(model_path)
|
|
|
|
| 284 |
server_port=7860,
|
| 285 |
share=True, # Enable sharing via Gradio's temporary URLs
|
| 286 |
show_api=True # Shows the API documentation
|
| 287 |
+
)
|
requirements.txt
CHANGED
|
@@ -7,10 +7,18 @@ numpy>=1.24.0
|
|
| 7 |
|
| 8 |
# LLM and embeddings
|
| 9 |
llama-index>=0.9.0
|
| 10 |
-
llama-index-embeddings-huggingface
|
| 11 |
-
llama-index-llms-llama-cpp
|
| 12 |
sentence-transformers>=2.2.0
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# Audio processing
|
| 15 |
ffmpeg-python>=0.2.0
|
| 16 |
pydub>=0.25.1
|
|
@@ -18,5 +26,5 @@ librosa>=0.10.1
|
|
| 18 |
soundfile>=0.12.1
|
| 19 |
|
| 20 |
# System utilities
|
| 21 |
-
psutil
|
| 22 |
-
gtts
|
|
|
|
| 7 |
|
| 8 |
# LLM and embeddings
|
| 9 |
llama-index>=0.9.0
|
| 10 |
+
llama-index-embeddings-huggingface>=0.2.0
|
| 11 |
+
llama-index-llms-llama-cpp>=0.1.0
|
| 12 |
sentence-transformers>=2.2.0
|
| 13 |
|
| 14 |
+
# API clients & HTTP
|
| 15 |
+
openai>=0.27.0
|
| 16 |
+
anthropic>=0.60.0
|
| 17 |
+
requests>=2.28.0
|
| 18 |
+
|
| 19 |
+
# Hugging Face utilities
|
| 20 |
+
huggingface-hub>=0.14.1
|
| 21 |
+
|
| 22 |
# Audio processing
|
| 23 |
ffmpeg-python>=0.2.0
|
| 24 |
pydub>=0.25.1
|
|
|
|
| 26 |
soundfile>=0.12.1
|
| 27 |
|
| 28 |
# System utilities
|
| 29 |
+
psutil>=5.9.0
|
| 30 |
+
gtts>=2.3.1
|
services/api_llm.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Factory functions for API-backed LLM clients.
|
| 3 |
+
Detects provider and key, returns an API-based LLM instance.
|
| 4 |
+
"""
|
| 5 |
+
import os
|
| 6 |
+
import requests
|
| 7 |
+
from openai import (api_key, ChatCompletion)
|
| 8 |
+
import anthropic
|
| 9 |
+
from llama_index.core import Settings
|
| 10 |
+
|
| 11 |
+
class OpenAI:
|
| 12 |
+
def __init__(self, api_key: str, temperature: float = 0.7, model_name: str = "gpt-3.5-turbo"):
|
| 13 |
+
api_key = api_key
|
| 14 |
+
self.temperature = temperature
|
| 15 |
+
self.model_name = model_name
|
| 16 |
+
|
| 17 |
+
def complete(self, prompt: str):
|
| 18 |
+
resp = ChatCompletion.create(
|
| 19 |
+
model=self.model_name,
|
| 20 |
+
messages=[{"role": "user", "content": prompt}],
|
| 21 |
+
temperature=self.temperature
|
| 22 |
+
)
|
| 23 |
+
class Response: pass
|
| 24 |
+
result = Response()
|
| 25 |
+
result.text = resp.choices[0].message["content"]
|
| 26 |
+
return result
|
| 27 |
+
|
| 28 |
+
class Anthropic:
|
| 29 |
+
def __init__(self, api_key: str, temperature: float = 0.7, model_name: str = "claude-2"):
|
| 30 |
+
self.client = anthropic.Client(api_key)
|
| 31 |
+
self.temperature = temperature
|
| 32 |
+
self.model_name = model_name
|
| 33 |
+
|
| 34 |
+
def complete(self, prompt: str):
|
| 35 |
+
resp = self.client.completions.create(
|
| 36 |
+
model=self.model_name,
|
| 37 |
+
prompt=prompt,
|
| 38 |
+
max_tokens_to_sample=256,
|
| 39 |
+
temperature=self.temperature
|
| 40 |
+
)
|
| 41 |
+
class Response: pass
|
| 42 |
+
result = Response()
|
| 43 |
+
result.text = resp.completion
|
| 44 |
+
return result
|
| 45 |
+
|
| 46 |
+
class MistralAPI:
|
| 47 |
+
def __init__(self, api_key: str, temperature: float = 0.7, model_name: str = "mistral-large"):
|
| 48 |
+
self.api_key = api_key
|
| 49 |
+
self.temperature = temperature
|
| 50 |
+
self.model_name = model_name
|
| 51 |
+
self.endpoint = f"https://api.mistral.ai/v1/models/{self.model_name}/completions"
|
| 52 |
+
|
| 53 |
+
def complete(self, prompt: str):
|
| 54 |
+
headers = {"Authorization": f"Bearer {self.api_key}"}
|
| 55 |
+
payload = {"prompt": prompt, "temperature": self.temperature, "max_tokens": 256}
|
| 56 |
+
resp = requests.post(self.endpoint, headers=headers, json=payload).json()
|
| 57 |
+
class Response: pass
|
| 58 |
+
result = Response()
|
| 59 |
+
result.text = resp.get("choices", [{}])[0].get("text", "")
|
| 60 |
+
return result
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def build_api_llm(provider: str, keys: dict, temperature: float = 0.7):
|
| 64 |
+
"""
|
| 65 |
+
Instantiate an API LLM based on provider name and supplied keys.
|
| 66 |
+
|
| 67 |
+
Args:
|
| 68 |
+
provider (str): one of "openai", "anthropic", "mistralai"
|
| 69 |
+
keys (dict): mapping provider -> API key
|
| 70 |
+
Returns:
|
| 71 |
+
LLM instance configured for API calls
|
| 72 |
+
"""
|
| 73 |
+
p = provider.lower()
|
| 74 |
+
if p == "openai":
|
| 75 |
+
key = keys.get("openai") or os.getenv("OPENAI_API_KEY")
|
| 76 |
+
client = OpenAI(api_key=key, temperature=temperature)
|
| 77 |
+
elif p == "anthropic":
|
| 78 |
+
key = keys.get("anthropic") or os.getenv("ANTHROPIC_API_KEY")
|
| 79 |
+
client = Anthropic(api_key=key, temperature=temperature)
|
| 80 |
+
elif p == "mistralai":
|
| 81 |
+
key = keys.get("mistralai") or os.getenv("MISTRAL_API_KEY")
|
| 82 |
+
client = MistralAPI(api_key=key, temperature=temperature)
|
| 83 |
+
else:
|
| 84 |
+
raise ValueError(f"Unsupported provider: {provider}")
|
| 85 |
+
|
| 86 |
+
Settings.llm = client
|
| 87 |
+
return client
|
services/prompt.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# services/prompt.py
|
| 2 |
+
|
| 3 |
+
SYSTEM_PROMPT = """
|
| 4 |
+
You are a medical assistant helping a user narrow down to the most likely ICD-10 code.
|
| 5 |
+
At each turn, EITHER ask one focused clarifying question (e.g. "Is your cough dry or productive?")
|
| 6 |
+
or, if you have enough info, output a final JSON with fields:
|
| 7 |
+
{"diagnoses":[…], "confidences":[…]}.
|
| 8 |
+
"""
|