AndaiMD commited on
Commit
2b8db5e
·
1 Parent(s): 4febfbc

initial commit

Browse files
Files changed (4) hide show
  1. Dockerfile +11 -0
  2. app/main.py +18 -0
  3. app/model_loader.py +32 -0
  4. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ FROM python:3.10
3
+
4
+ WORKDIR /code
5
+
6
+ COPY requirements.txt .
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/main.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from fastapi import FastAPI, Request
3
+ from fastapi.responses import JSONResponse
4
+ from app.model_loader import load_model
5
+ import torch
6
+
7
+ app = FastAPI()
8
+ model, tokenizer = load_model()
9
+
10
+ @app.post("/predict")
11
+ async def predict(request: Request):
12
+ data = await request.json()
13
+ input_text = data.get("input", "")
14
+ inputs = tokenizer(input_text, return_tensors="pt")
15
+ with torch.no_grad():
16
+ outputs = model.generate(**inputs, max_new_tokens=50)
17
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
18
+ return JSONResponse(content={"output": response})
app/model_loader.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ from peft import PeftModel
5
+
6
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
7
+ os.makedirs("/tmp/hf_cache", exist_ok=True)
8
+
9
+ def load_model():
10
+ hf_token = os.getenv("HF_TOKEN")
11
+ if not hf_token:
12
+ raise RuntimeError("HF_TOKEN not set.")
13
+
14
+ base_model = AutoModelForCausalLM.from_pretrained(
15
+ "meta-llama/Llama-2-7b-chat-hf",
16
+ use_auth_token=hf_token,
17
+ cache_dir="/tmp/hf_cache",
18
+ torch_dtype="auto",
19
+ device_map="auto"
20
+ )
21
+ model = PeftModel.from_pretrained(
22
+ base_model,
23
+ "BrainGPT/BrainGPT-7B-v0.1",
24
+ use_auth_token=hf_token,
25
+ cache_dir="/tmp/hf_cache"
26
+ )
27
+ tokenizer = AutoTokenizer.from_pretrained(
28
+ "meta-llama/Llama-2-7b-chat-hf",
29
+ use_auth_token=hf_token,
30
+ cache_dir="/tmp/hf_cache"
31
+ )
32
+ return model, tokenizer
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+
2
+ transformers
3
+ peft
4
+ torch
5
+ accelerate
6
+ fastapi
7
+ uvicorn