pakito312 commited on
Commit
e031762
·
1 Parent(s): 850166a
Files changed (3) hide show
  1. Dockerfile +19 -10
  2. api.py +49 -12
  3. requirements.txt +7 -5
Dockerfile CHANGED
@@ -1,18 +1,27 @@
1
  FROM python:3.10-slim
2
 
3
- # Installer curl et zstd pour Ollama
4
- RUN apt-get update && apt-get install -y curl zstd && rm -rf /var/lib/apt/lists/*
 
 
 
5
 
6
- # Installer Ollama
7
- RUN curl -fsSL https://ollama.ai/install.sh | sh
8
 
9
- # Copier l'API
10
- COPY api.py .
 
 
 
 
 
 
11
 
12
- # Installer seulement FastAPI et requests
13
- RUN pip install fastapi uvicorn requests
14
 
 
15
  EXPOSE 7860
16
 
17
- # Démarrer
18
- CMD ["sh", "-c", "ollama serve & sleep 40 && ollama pull deepseek-coder:1.3b && uvicorn api:app --host 0.0.0.0 --port 7860"]
 
1
  FROM python:3.10-slim
2
 
3
+ # Variables d'environnement (HF + perf)
4
+ ENV PYTHONDONTWRITEBYTECODE=1
5
+ ENV PYTHONUNBUFFERED=1
6
+ ENV TRANSFORMERS_CACHE=/data/hf_cache
7
+ ENV HF_HOME=/data/hf_cache
8
 
9
+ WORKDIR /app
 
10
 
11
+ # Dépendances système minimales
12
+ RUN apt-get update && apt-get install -y \
13
+ git \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # Installer les dépendances Python
17
+ COPY requirements.txt .
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
 
20
+ # Copier l'application
21
+ COPY api.py .
22
 
23
+ # Exposer le port HF Space
24
  EXPOSE 7860
25
 
26
+ # Lancer FastAPI
27
+ CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "7860"]
api.py CHANGED
@@ -1,23 +1,60 @@
1
  from fastapi import FastAPI
2
- import requests
3
- import time
 
4
 
5
- app = FastAPI()
6
- OLLAMA_URL = "http://localhost:11434"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  @app.get("/")
9
  def root():
10
- return {"message": "DeepSeek-Coder API"}
11
 
12
  @app.post("/generate")
13
- def generate(prompt: str):
14
- response = requests.post(f"{OLLAMA_URL}/api/generate", json={
15
- "model": "deepseek-coder:1.3b",
16
- "prompt": prompt,
17
- "stream": False
18
- })
19
- return response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
 
 
 
 
21
  if __name__ == "__main__":
22
  import uvicorn
23
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
 
6
+ app = FastAPI(title="Phi-3 Code API")
7
+
8
+ MODEL_ID = "microsoft/phi-3-mini-4k-instruct"
9
+
10
+ # Chargement du tokenizer et du modèle
11
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
12
+
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ MODEL_ID,
15
+ device_map="auto",
16
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
17
+ low_cpu_mem_usage=True
18
+ )
19
+
20
+ class GenerateRequest(BaseModel):
21
+ prompt: str
22
+ max_tokens: int = 512
23
+ temperature: float = 0.2
24
 
25
  @app.get("/")
26
  def root():
27
+ return {"message": "Phi-3-mini Code API is running"}
28
 
29
  @app.post("/generate")
30
+ def generate(req: GenerateRequest):
31
+ prompt = req.prompt.strip()
32
+
33
+ inputs = tokenizer(
34
+ prompt,
35
+ return_tensors="pt",
36
+ truncation=True,
37
+ max_length=4096
38
+ ).to(model.device)
39
+
40
+ with torch.no_grad():
41
+ output = model.generate(
42
+ **inputs,
43
+ max_new_tokens=req.max_tokens,
44
+ temperature=req.temperature,
45
+ do_sample=False,
46
+ pad_token_id=tokenizer.eos_token_id
47
+ )
48
+
49
+ result = tokenizer.decode(
50
+ output[0],
51
+ skip_special_tokens=True
52
+ )
53
 
54
+ return {
55
+ "model": MODEL_ID,
56
+ "response": result
57
+ }
58
  if __name__ == "__main__":
59
  import uvicorn
60
  uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt CHANGED
@@ -1,5 +1,7 @@
1
- fastapi==0.104.1
2
- uvicorn[standard]==0.24.0
3
- pydantic==2.5.0
4
- aiohttp==3.13.3
5
- requests==2.31.0
 
 
 
1
+ fastapi==0.110.0
2
+ uvicorn==0.27.1
3
+ torch>=2.1.0
4
+ transformers>=4.39.0
5
+ accelerate>=0.27.0
6
+ sentencepiece
7
+ pydantic