Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
|
@@ -3,10 +3,11 @@ import os
|
|
| 3 |
import re
|
| 4 |
from contextlib import asynccontextmanager
|
| 5 |
|
|
|
|
| 6 |
from fastapi import FastAPI, HTTPException, Request
|
| 7 |
from pydantic import BaseModel
|
| 8 |
|
| 9 |
-
MODEL_NAME = "Qwen/Qwen3.5-9B"
|
| 10 |
API_KEY = os.getenv("API_KEY")
|
| 11 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 12 |
|
|
@@ -25,7 +26,6 @@ async def lifespan(app: FastAPI):
|
|
| 25 |
print("HF_TOKEN found — using HF Inference API")
|
| 26 |
from huggingface_hub import InferenceClient
|
| 27 |
inference_client = InferenceClient(
|
| 28 |
-
provider="together",
|
| 29 |
api_key=HF_TOKEN,
|
| 30 |
)
|
| 31 |
print("Inference client ready.")
|
|
@@ -92,13 +92,13 @@ def run_hf_api(texts: list[str]) -> str:
|
|
| 92 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 93 |
{"role": "user", "content": build_prompt(texts)},
|
| 94 |
]
|
| 95 |
-
response = inference_client.
|
| 96 |
model=MODEL_NAME,
|
| 97 |
messages=messages,
|
| 98 |
max_tokens=256,
|
| 99 |
temperature=0.1,
|
| 100 |
)
|
| 101 |
-
return response.choices[0].message.content
|
| 102 |
|
| 103 |
|
| 104 |
def run_local(texts: list[str]) -> str:
|
|
|
|
| 3 |
import re
|
| 4 |
from contextlib import asynccontextmanager
|
| 5 |
|
| 6 |
+
|
| 7 |
from fastapi import FastAPI, HTTPException, Request
|
| 8 |
from pydantic import BaseModel
|
| 9 |
|
| 10 |
+
MODEL_NAME = "Qwen/Qwen3.5-9B:together"
|
| 11 |
API_KEY = os.getenv("API_KEY")
|
| 12 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 13 |
|
|
|
|
| 26 |
print("HF_TOKEN found — using HF Inference API")
|
| 27 |
from huggingface_hub import InferenceClient
|
| 28 |
inference_client = InferenceClient(
|
|
|
|
| 29 |
api_key=HF_TOKEN,
|
| 30 |
)
|
| 31 |
print("Inference client ready.")
|
|
|
|
| 92 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 93 |
{"role": "user", "content": build_prompt(texts)},
|
| 94 |
]
|
| 95 |
+
response = inference_client.chat.completions.create(
|
| 96 |
model=MODEL_NAME,
|
| 97 |
messages=messages,
|
| 98 |
max_tokens=256,
|
| 99 |
temperature=0.1,
|
| 100 |
)
|
| 101 |
+
return response.choices[0].message.content or ""
|
| 102 |
|
| 103 |
|
| 104 |
def run_local(texts: list[str]) -> str:
|