rayymaxx commited on
Commit
6c0bb59
·
1 Parent(s): 03a5b76

Made application

Browse files
Files changed (3) hide show
  1. Dockerfile +11 -0
  2. app.py +76 -0
  3. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /code
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ # Run FastAPI app with uvicorn
11
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py (safe, use /tmp for cache)
2
+ import os
3
+ import logging
4
+ from fastapi import FastAPI, HTTPException
5
+ from pydantic import BaseModel
6
+ import tempfile
7
+
8
+ # --- Put caches in a writable temp dir to avoid permission errors ---
9
+ TMP_CACHE = os.environ.get("HF_CACHE_DIR", os.path.join(tempfile.gettempdir(), "hf_cache"))
10
+ try:
11
+ os.makedirs(TMP_CACHE, exist_ok=True)
12
+ except Exception as e:
13
+ # if even this fails, fall back to tempfile.gettempdir()
14
+ TMP_CACHE = tempfile.gettempdir()
15
+
16
+ # export environment vars before importing transformers
17
+ os.environ["TRANSFORMERS_CACHE"] = TMP_CACHE
18
+ os.environ["HF_HOME"] = TMP_CACHE
19
+ os.environ["HF_DATASETS_CACHE"] = TMP_CACHE
20
+ os.environ["HF_METRICS_CACHE"] = TMP_CACHE
21
+
22
+ app = FastAPI(title="DirectEd LoRA API (safe startup)")
23
+
24
+ @app.get("/health")
25
+ def health():
26
+ return {"ok": True}
27
+
28
+ @app.get("/")
29
+ def root():
30
+ return {"Status": "AI backend is running"}
31
+
32
+ class Request(BaseModel):
33
+ prompt: str
34
+ max_new_tokens: int = 150
35
+ temperature: float = 0.7
36
+
37
+ pipe = None
38
+
39
+ @app.on_event("startup")
40
+ def load_model():
41
+ global pipe
42
+ try:
43
+ # heavy imports done during startup
44
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
45
+ from peft import PeftModel
46
+
47
+ BASE_MODEL = "unsloth/llama-3-8b-Instruct-bnb-4bit"
48
+ ADAPTER_REPO = "rayymaxx/DirectEd-AI-LoRA" # <-- replace with your adapter repo
49
+
50
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
51
+ base_model = AutoModelForCausalLM.from_pretrained(
52
+ BASE_MODEL,
53
+ device_map="auto",
54
+ low_cpu_mem_usage=True,
55
+ torch_dtype="auto",
56
+ )
57
+
58
+ model = PeftModel.from_pretrained(base_model, ADAPTER_REPO)
59
+ model.eval()
60
+
61
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
62
+ logging.info("Model and adapter loaded successfully.")
63
+ except Exception as e:
64
+ logging.exception("Failed to load model at startup: %s", e)
65
+ pipe = None
66
+
67
+ @app.post("/generate")
68
+ def generate(req: Request):
69
+ if pipe is None:
70
+ raise HTTPException(status_code=503, detail="Model not loaded. Check logs.")
71
+ try:
72
+ out = pipe(req.prompt, max_new_tokens=req.max_new_tokens, temperature=req.temperature, do_sample=True)
73
+ return {"response": out[0]["generated_text"]}
74
+ except Exception as e:
75
+ logging.exception("Generation failed: %s", e)
76
+ raise HTTPException(status_code=500, detail=str(e))
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ peft
4
+ accelerate
5
+ bitsandbytes
6
+ fastapi
7
+ uvicorn
8
+ bitsandbytes