Spaces:

Nutnell
/

E_Learning_platform

Paused

App Files Files Community

Nutnell commited on Aug 26, 2025

Commit

bfe0287

verified ·

1 Parent(s): 53098a5

Changed the max output tokens.

Browse files

Files changed (1) hide show

app.py +3 -17

app.py CHANGED Viewed

@@ -1,11 +1,9 @@
-# app.py
 import os
 import logging
 import tempfile
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
-# --- Use writable temp dir for Hugging Face caches ---
 TMP_CACHE = os.environ.get("HF_CACHE_DIR", os.path.join(tempfile.gettempdir(), "hf_cache"))
 try:
     os.makedirs(TMP_CACHE, exist_ok=True)
@@ -19,15 +17,9 @@ os.environ["HF_METRICS_CACHE"] = TMP_CACHE
 app = FastAPI(title="DirectEd LoRA API (concise)")
-# ---------------------
-# Request Model
-# ---------------------
 class PromptRequest(BaseModel):
     prompt: str
-# ---------------------
-# Health & Root
-# ---------------------
 @app.get("/health")
 def health():
     return {"ok": True}
@@ -36,9 +28,6 @@ def health():
 def root():
     return {"status": "AI backend is running"}
-# ---------------------
-# Load Model on Startup
-# ---------------------
 pipe = None
 @app.on_event("startup")
@@ -69,22 +58,19 @@ def load_model():
         logging.exception("Failed to load model at startup: %s", e)
         pipe = None
-# ---------------------
-# Generate Endpoint
-# ---------------------
 @app.post("/generate")
 def generate(req: PromptRequest):
     if pipe is None:
         raise HTTPException(status_code=503, detail="Model not loaded. Check logs.")
     try:
-        # Limit tokens to avoid huge outputs
-        max_tokens = 150
         output = pipe(req.prompt, max_new_tokens=max_tokens, do_sample=True)
         text = output[0].get("generated_text", "").strip()
-        # Remove repeated context if present
         if text.startswith(req.prompt):
             text = text[len(req.prompt):].strip()

 import os
 import logging
 import tempfile
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 TMP_CACHE = os.environ.get("HF_CACHE_DIR", os.path.join(tempfile.gettempdir(), "hf_cache"))
 try:
     os.makedirs(TMP_CACHE, exist_ok=True)
 app = FastAPI(title="DirectEd LoRA API (concise)")
 class PromptRequest(BaseModel):
     prompt: str
 @app.get("/health")
 def health():
     return {"ok": True}
 def root():
     return {"status": "AI backend is running"}
 pipe = None
 @app.on_event("startup")
         logging.exception("Failed to load model at startup: %s", e)
         pipe = None
 @app.post("/generate")
 def generate(req: PromptRequest):
     if pipe is None:
         raise HTTPException(status_code=503, detail="Model not loaded. Check logs.")
     try:
+        max_tokens = 2048
         output = pipe(req.prompt, max_new_tokens=max_tokens, do_sample=True)
         text = output[0].get("generated_text", "").strip()
         if text.startswith(req.prompt):
             text = text[len(req.prompt):].strip()