natalieparker commited on
Commit
45cf4c1
·
verified ·
1 Parent(s): 39026a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -33
app.py CHANGED
@@ -1,43 +1,41 @@
1
  from fastapi import FastAPI
2
- from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
 
6
- MODEL_NAME = "natalieparker/LumaAI-160M-v3"
 
 
7
 
8
- print("🔥 Loading tokenizer...")
9
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
 
11
- print("🔥 Loading model...")
12
  model = AutoModelForCausalLM.from_pretrained(
13
- MODEL_NAME,
14
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
15
  low_cpu_mem_usage=True
16
  )
17
-
18
- device = "cuda" if torch.cuda.is_available() else "cpu"
19
- model.to(device)
20
-
21
- app = FastAPI()
22
-
23
- class GenerateRequest(BaseModel):
24
- prompt: str
25
- max_new_tokens: int = 150
26
- temperature: float = 0.9
27
- top_p: float = 0.9
28
-
29
- @app.post("/api/generate")
30
- def generate(req: GenerateRequest):
31
- inputs = tokenizer(req.prompt, return_tensors="pt").to(device)
32
-
33
- output = model.generate(
34
- **inputs,
35
- max_new_tokens=req.max_new_tokens,
36
- temperature=req.temperature,
37
- top_p=req.top_p,
38
- do_sample=True,
39
- repetition_penalty=1.05,
40
- )
41
-
42
  text = tokenizer.decode(output[0], skip_special_tokens=True)
43
- return {"response": text}
 
 
 
 
 
1
  from fastapi import FastAPI
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
 
5
+ app = FastAPI()
6
+
7
+ MODEL = "natalieparker/LumaAI-160M-v3"
8
 
9
+ print("🔄 Loading tokenizer...")
10
+ tokenizer = AutoTokenizer.from_pretrained(MODEL)
11
 
12
+ print("🔄 Loading model on CPU...")
13
  model = AutoModelForCausalLM.from_pretrained(
14
+ MODEL,
15
+ torch_dtype=torch.float32, # CPU only
16
  low_cpu_mem_usage=True
17
  )
18
+ model.to("cpu")
19
+
20
+ @app.get("/")
21
+ def root():
22
+ return {"status": "LumaAI API is live on CPU"}
23
+
24
+ @app.post("/generate")
25
+ def generate(prompt: str):
26
+ inputs = tokenizer(prompt, return_tensors="pt")
27
+ with torch.no_grad():
28
+ output = model.generate(
29
+ **inputs,
30
+ max_new_tokens=150,
31
+ temperature=0.9,
32
+ top_p=0.9,
33
+ repetition_penalty=1.05,
34
+ do_sample=True
35
+ )
 
 
 
 
 
 
 
36
  text = tokenizer.decode(output[0], skip_special_tokens=True)
37
+ return {"response": text}
38
+
39
+ if __name__ == "__main__":
40
+ import uvicorn
41
+ uvicorn.run(app, host="0.0.0.0", port=7860)