Batrdj commited on
Commit
5a1a2fd
·
verified ·
1 Parent(s): fa34f68

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +18 -0
  2. app.py +45 -0
  3. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y \
6
+ git \
7
+ gcc \
8
+ g++ \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ COPY app.py .
15
+
16
+ EXPOSE 7860
17
+
18
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
4
+ import torch
5
+
6
+ app = FastAPI()
7
+
8
+ MODEL_NAME = "Qwen/Qwen2.5-Coder-7B"
9
+
10
+ # ---- Quantization config (CPU safe) ----
11
+ bnb_config = BitsAndBytesConfig(
12
+ load_in_4bit=True,
13
+ bnb_4bit_compute_dtype=torch.float32,
14
+ bnb_4bit_use_double_quant=True,
15
+ bnb_4bit_quant_type="nf4"
16
+ )
17
+
18
+ tokenizer = AutoTokenizer.from_pretrained(
19
+ MODEL_NAME,
20
+ trust_remote_code=True
21
+ )
22
+
23
+ model = AutoModelForCausalLM.from_pretrained(
24
+ MODEL_NAME,
25
+ device_map="cpu",
26
+ quantization_config=bnb_config,
27
+ trust_remote_code=True
28
+ )
29
+
30
+ class Prompt(BaseModel):
31
+ message: str
32
+
33
+ @app.post("/chat")
34
+ def chat(prompt: Prompt):
35
+ inputs = tokenizer(prompt.message, return_tensors="pt")
36
+
37
+ outputs = model.generate(
38
+ **inputs,
39
+ max_new_tokens=200,
40
+ temperature=0.7,
41
+ do_sample=True
42
+ )
43
+
44
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
45
+ return {"response": response}
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ torch
4
+ transformers
5
+ sentencepiece
6
+ accelerate
7
+ bitsandbytes