Raiff1982 commited on
Commit
b7cc188
Β·
verified Β·
1 Parent(s): 76b05ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -23,19 +23,19 @@ from fastapi.staticfiles import StaticFiles
23
  from huggingface_hub import InferenceClient
24
 
25
  # ── Configuration ──────────────────────────────────────────────
26
- MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
27
- # On HF Spaces, HF_TOKEN is auto-provided by the environment
28
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
 
29
  MAX_TOKENS = 400
30
  TEMPERATURE = 0.7
31
  TOP_P = 0.9
32
 
33
  # ── Inference Client ──────────────────────────────────────────
34
- # Use HF Inference API without token for on-Space deployment
35
- # Or with token for more stability
36
  try:
37
- client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
38
- print(f"[INIT] Inference client initialized (token={'provided' if HF_TOKEN else 'not provided'})")
39
  except Exception as e:
40
  print(f"[WARN] Client init failed: {e}")
41
  client = InferenceClient(model=MODEL_ID)
 
23
  from huggingface_hub import InferenceClient
24
 
25
  # ── Configuration ──────────────────────────────────────────────
26
+ # Use your GGUF quantized Codette model (optimized for inference)
27
+ MODEL_ID = "Raiff1982/codette-llama-3.1-8b-gguf"
28
+ # Don't use token - your public models work without it
29
+ HF_TOKEN = None
30
  MAX_TOKENS = 400
31
  TEMPERATURE = 0.7
32
  TOP_P = 0.9
33
 
34
  # ── Inference Client ──────────────────────────────────────────
35
+ # Use HF Inference API with your GGUF Codette model
 
36
  try:
37
+ client = InferenceClient(model=MODEL_ID)
38
+ print(f"[INIT] Inference client initialized with {MODEL_ID}")
39
  except Exception as e:
40
  print(f"[WARN] Client init failed: {e}")
41
  client = InferenceClient(model=MODEL_ID)