Spaces:

Nutnell
/

E_Learning_platform

Paused

App Files Files Community

Nutnell commited on Aug 26, 2025

Commit

79529dc

verified ·

1 Parent(s): bfe0287

Update to strip echoing in answers

Browse files

Files changed (1) hide show

app.py +4 -12

app.py CHANGED Viewed

@@ -15,14 +15,11 @@ os.environ["HF_HOME"] = TMP_CACHE
 os.environ["HF_DATASETS_CACHE"] = TMP_CACHE
 os.environ["HF_METRICS_CACHE"] = TMP_CACHE
-app = FastAPI(title="DirectEd LoRA API (concise)")
 class PromptRequest(BaseModel):
     prompt: str
-@app.get("/health")
-def health():
-    return {"ok": True}
 @app.get("/")
 def root():
@@ -44,7 +41,6 @@ def load_model():
         base_model = AutoModelForCausalLM.from_pretrained(
             BASE_MODEL,
             device_map="auto",
-            low_cpu_mem_usage=True,
             torch_dtype="auto",
         )
@@ -58,24 +54,19 @@ def load_model():
         logging.exception("Failed to load model at startup: %s", e)
         pipe = None
 @app.post("/generate")
 def generate(req: PromptRequest):
     if pipe is None:
         raise HTTPException(status_code=503, detail="Model not loaded. Check logs.")
     try:
-        max_tokens = 2048
-        output = pipe(req.prompt, max_new_tokens=max_tokens, do_sample=True)
         text = output[0].get("generated_text", "").strip()
         if text.startswith(req.prompt):
             text = text[len(req.prompt):].strip()
         if not text:
-            logging.warning("Model returned empty response for prompt: %s", req.prompt)
             text = "No response generated by the model."
         return {"response": text}
@@ -83,3 +74,4 @@ def generate(req: PromptRequest):
     except Exception as e:
         logging.exception("Generation failed for prompt '%s': %s", req.prompt, e)
         raise HTTPException(status_code=500, detail=f"Generation failed: {e}")

 os.environ["HF_DATASETS_CACHE"] = TMP_CACHE
 os.environ["HF_METRICS_CACHE"] = TMP_CACHE
+app = FastAPI(title="DirectEd LoRA API")
 class PromptRequest(BaseModel):
     prompt: str
+    max_new_tokens: int = 2048
 @app.get("/")
 def root():
         base_model = AutoModelForCausalLM.from_pretrained(
             BASE_MODEL,
             device_map="auto",
             torch_dtype="auto",
         )
         logging.exception("Failed to load model at startup: %s", e)
         pipe = None
 @app.post("/generate")
 def generate(req: PromptRequest):
     if pipe is None:
         raise HTTPException(status_code=503, detail="Model not loaded. Check logs.")
     try:
+        output = pipe(req.prompt, max_new_tokens=req.max_new_tokens, do_sample=True, temperature=0.7)
         text = output[0].get("generated_text", "").strip()
         if text.startswith(req.prompt):
             text = text[len(req.prompt):].strip()
         if not text:
             text = "No response generated by the model."
         return {"response": text}
     except Exception as e:
         logging.exception("Generation failed for prompt '%s': %s", req.prompt, e)
         raise HTTPException(status_code=500, detail=f"Generation failed: {e}")