rewgwrth commited on
Commit
f439658
·
1 Parent(s): 5e6c960
Files changed (3) hide show
  1. Dockerfile +19 -0
  2. main.py +61 -0
  3. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && \
6
+ apt-get install -y --no-install-recommends git g++ make && \
7
+ apt-get clean && \
8
+ rm -rf /var/lib/apt/lists/*
9
+
10
+ COPY requirements.txt .
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ COPY main.py .
14
+
15
+ ENV HF_HOME=/tmp/huggingface-cache
16
+ ENV TOKENIZERS_PARALLELISM=false
17
+
18
+ EXPOSE 7860
19
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
+ import torch
5
+ import numpy as np
6
+
7
+ # Проверка версии NumPy
8
+ assert np.__version__.startswith('1.'), f"Несовместимая версия NumPy: {np.__version__}"
9
+
10
+ app = FastAPI()
11
+
12
+
13
+ class RequestData(BaseModel):
14
+ prompt: str
15
+ max_tokens: int = 50
16
+
17
+
18
+ MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
19
+
20
+ try:
21
+ # Загрузка модели с явным указанием device_map
22
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
23
+ model = AutoModelForCausalLM.from_pretrained(
24
+ MODEL_NAME,
25
+ torch_dtype=torch.float32,
26
+ device_map="auto",
27
+ low_cpu_mem_usage=True
28
+ )
29
+
30
+ # Создаем pipeline без указания device
31
+ generator = pipeline(
32
+ "text-generation",
33
+ model=model,
34
+ tokenizer=tokenizer
35
+ )
36
+ except Exception as e:
37
+ print(f"Ошибка загрузки модели: {str(e)}")
38
+ generator = None
39
+
40
+
41
+ @app.post("/generate")
42
+ async def generate_text(request: RequestData):
43
+ if not generator:
44
+ raise HTTPException(status_code=503, detail="Модель не загружена")
45
+
46
+ try:
47
+ output = generator(
48
+ request.prompt,
49
+ max_new_tokens=min(request.max_tokens, 100),
50
+ do_sample=False,
51
+ num_beams=1,
52
+ temperature=0.7,
53
+ )
54
+ return {"response": output[0]["generated_text"]}
55
+ except Exception as e:
56
+ raise HTTPException(status_code=500, detail=str(e))
57
+
58
+
59
+ @app.get("/health")
60
+ async def health_check():
61
+ return {"status": "ok" if generator else "unavailable"}
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.109.0
2
+ uvicorn==0.27.0
3
+ torch==2.2.1 --index-url https://download.pytorch.org/whl/cpu
4
+ transformers==4.40.2
5
+ accelerate==0.29.3
6
+ sentencepiece==0.2.0
7
+ numpy==1.26.4
8
+ protobuf==3.20.3