Suguru1846 commited on
Commit
5920a0a
·
verified ·
1 Parent(s): 83f9645

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -0
app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+
5
+ app = FastAPI()
6
+
7
+ # Load the LoRA model (4-bit Quantized)
8
+ model_name = "/app/model" # Local model path
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ model_name, torch_dtype=torch.float16, device_map="auto"
12
+ )
13
+
14
+ @app.post("/generate")
15
+ async def generate_text(prompt: str, max_tokens: int = 50):
16
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
17
+ outputs = model.generate(**inputs, max_new_tokens=max_tokens)
18
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
19
+ return {"response": response}
20
+
21
+ @app.get("/")
22
+ async def root():
23
+ return {"message": "Counseling AI Model is Running!"}