Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,19 +23,19 @@ from fastapi.staticfiles import StaticFiles
|
|
| 23 |
from huggingface_hub import InferenceClient
|
| 24 |
|
| 25 |
# ββ Configuration ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
| 29 |
MAX_TOKENS = 400
|
| 30 |
TEMPERATURE = 0.7
|
| 31 |
TOP_P = 0.9
|
| 32 |
|
| 33 |
# ββ Inference Client ββββββββββββββββββββββββββββββββββββββββββ
|
| 34 |
-
# Use HF Inference API
|
| 35 |
-
# Or with token for more stability
|
| 36 |
try:
|
| 37 |
-
client = InferenceClient(model=MODEL_ID
|
| 38 |
-
print(f"[INIT] Inference client initialized
|
| 39 |
except Exception as e:
|
| 40 |
print(f"[WARN] Client init failed: {e}")
|
| 41 |
client = InferenceClient(model=MODEL_ID)
|
|
|
|
| 23 |
from huggingface_hub import InferenceClient
|
| 24 |
|
| 25 |
# ββ Configuration ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 26 |
+
# Use your GGUF quantized Codette model (optimized for inference)
|
| 27 |
+
MODEL_ID = "Raiff1982/codette-llama-3.1-8b-gguf"
|
| 28 |
+
# Don't use token - your public models work without it
|
| 29 |
+
HF_TOKEN = None
|
| 30 |
MAX_TOKENS = 400
|
| 31 |
TEMPERATURE = 0.7
|
| 32 |
TOP_P = 0.9
|
| 33 |
|
| 34 |
# ββ Inference Client ββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
+
# Use HF Inference API with your GGUF Codette model
|
|
|
|
| 36 |
try:
|
| 37 |
+
client = InferenceClient(model=MODEL_ID)
|
| 38 |
+
print(f"[INIT] Inference client initialized with {MODEL_ID}")
|
| 39 |
except Exception as e:
|
| 40 |
print(f"[WARN] Client init failed: {e}")
|
| 41 |
client = InferenceClient(model=MODEL_ID)
|