Spaces:
Sleeping
Sleeping
CrazyMonkey0 commited on
Commit ·
fe8b413
1
Parent(s): ad5570a
test(models): downloading models from transformers
Browse files- Dockerfile +5 -22
- app/main.py +2 -2
- app/routes/nlp.py +29 -27
Dockerfile
CHANGED
|
@@ -1,38 +1,21 @@
|
|
| 1 |
-
# Use
|
| 2 |
FROM python:3.12
|
| 3 |
|
| 4 |
# Set working directory
|
| 5 |
WORKDIR /app
|
| 6 |
|
| 7 |
-
# Install system dependencies needed for llama-cpp-python and general Python packages
|
| 8 |
-
RUN apt-get update && apt-get install -y \
|
| 9 |
-
wget \
|
| 10 |
-
curl \
|
| 11 |
-
git \
|
| 12 |
-
build-essential \
|
| 13 |
-
cmake \
|
| 14 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 15 |
-
|
| 16 |
# Upgrade pip
|
| 17 |
RUN pip install --upgrade pip
|
| 18 |
|
| 19 |
-
#
|
| 20 |
-
RUN pip install --no-cache-dir \
|
| 21 |
-
llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu \
|
| 22 |
-
huggingface-hub
|
| 23 |
-
|
| 24 |
-
# Copy requirements and install other dependencies
|
| 25 |
COPY ./requirements.txt /app/requirements.txt
|
| 26 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 27 |
|
| 28 |
-
# Copy
|
| 29 |
COPY . /app
|
| 30 |
|
| 31 |
-
# Ensure models folder exists (optional, can store HF cache here)
|
| 32 |
-
RUN mkdir -p /app/models
|
| 33 |
-
|
| 34 |
# Expose FastAPI port
|
| 35 |
EXPOSE 7860
|
| 36 |
|
| 37 |
-
# Use Gunicorn with Uvicorn workers
|
| 38 |
-
CMD ["gunicorn", "app.main:app", "-k", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:7860", "--workers", "
|
|
|
|
| 1 |
+
# Use official Python 3.12 image
|
| 2 |
FROM python:3.12
|
| 3 |
|
| 4 |
# Set working directory
|
| 5 |
WORKDIR /app
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
# Upgrade pip
|
| 8 |
RUN pip install --upgrade pip
|
| 9 |
|
| 10 |
+
# Copy requirements and install dependencies
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
COPY ./requirements.txt /app/requirements.txt
|
| 12 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 13 |
|
| 14 |
+
# Copy application code
|
| 15 |
COPY . /app
|
| 16 |
|
|
|
|
|
|
|
|
|
|
| 17 |
# Expose FastAPI port
|
| 18 |
EXPOSE 7860
|
| 19 |
|
| 20 |
+
# Use Gunicorn with Uvicorn workers
|
| 21 |
+
CMD ["gunicorn", "app.main:app", "-k", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:7860", "--workers", "1"]
|
app/main.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
-
from app.routes.nlp import
|
| 3 |
from app.routes.tts import load_model_tts
|
| 4 |
from app.routes.asr import load_model_asr, router as asr_router
|
| 5 |
from app.routes.translation import load_model_translation, router as trans_router
|
|
@@ -12,7 +12,7 @@ app = FastAPI(debug=False)
|
|
| 12 |
async def startup_event():
|
| 13 |
print("[INFO] Loading all models...")
|
| 14 |
try:
|
| 15 |
-
app.state.
|
| 16 |
app.state.model_trans, app.state.tokenizer_trans = load_model_translation()
|
| 17 |
app.state.model_tts = load_model_tts()
|
| 18 |
app.state.processor_asr, app.state.model_asr = load_model_asr()
|
|
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
+
from app.routes.nlp import load_model_nlp, router as nlp_router
|
| 3 |
from app.routes.tts import load_model_tts
|
| 4 |
from app.routes.asr import load_model_asr, router as asr_router
|
| 5 |
from app.routes.translation import load_model_translation, router as trans_router
|
|
|
|
| 12 |
async def startup_event():
|
| 13 |
print("[INFO] Loading all models...")
|
| 14 |
try:
|
| 15 |
+
app.state.model_nlp, app.state.tokenizer_nlp = load_model_nlp()
|
| 16 |
app.state.model_trans, app.state.tokenizer_trans = load_model_translation()
|
| 17 |
app.state.model_tts = load_model_tts()
|
| 18 |
app.state.processor_asr, app.state.model_asr = load_model_asr()
|
app/routes/nlp.py
CHANGED
|
@@ -1,41 +1,43 @@
|
|
| 1 |
from fastapi import APIRouter, Request
|
|
|
|
| 2 |
from pydantic import BaseModel
|
| 3 |
-
from llama_cpp import Llama
|
| 4 |
from .tts import save_audio
|
| 5 |
-
|
| 6 |
|
| 7 |
router = APIRouter()
|
|
|
|
| 8 |
|
| 9 |
class ChatRequest(BaseModel):
|
| 10 |
message: str
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
filename="Qwen3-8B-Q4_K_M.gguf",
|
| 17 |
-
n_ctx=2048,
|
| 18 |
-
n_threads=8,
|
| 19 |
-
temperature=0.7,
|
| 20 |
-
top_p=0.9,
|
| 21 |
-
)
|
| 22 |
-
|
| 23 |
-
# FastAPI startup event (w main.py)
|
| 24 |
-
# app.state.model_lama = load_model_lama()
|
| 25 |
|
|
|
|
| 26 |
@router.post("/chat")
|
| 27 |
async def chat(request: Request, message: ChatRequest):
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
# url_path = save_audio(request, response)
|
| 39 |
|
| 40 |
-
|
| 41 |
-
return {"response": response, "audio": "TTS disabled"}
|
|
|
|
| 1 |
from fastapi import APIRouter, Request
|
| 2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 3 |
from pydantic import BaseModel
|
|
|
|
| 4 |
from .tts import save_audio
|
| 5 |
+
|
| 6 |
|
| 7 |
router = APIRouter()
|
| 8 |
+
model_name = "Qwen/Qwen2.5-1.5B-Instruct"
|
| 9 |
|
| 10 |
class ChatRequest(BaseModel):
|
| 11 |
message: str
|
| 12 |
|
| 13 |
+
def load_model_nlp():
|
| 14 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="cpu")
|
| 15 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 16 |
+
return model, tokenizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
+
# Handle chat requests
|
| 19 |
@router.post("/chat")
|
| 20 |
async def chat(request: Request, message: ChatRequest):
|
| 21 |
+
message = message.message
|
| 22 |
+
# Get the loaded NLP model and tokenizer
|
| 23 |
+
model, tokenizer = request.app.state.model_nlp, request.app.state.tokenizer_nlp
|
| 24 |
+
|
| 25 |
+
# Prepare the conversation context
|
| 26 |
+
messages = [
|
| 27 |
+
{"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant for learning English."},
|
| 28 |
+
{"role": "user", "content": message},
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
# Tokenize input and generate a response
|
| 32 |
+
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 33 |
+
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
| 34 |
+
generated_ids = model.generate(**model_inputs, max_new_tokens=512)
|
| 35 |
+
|
| 36 |
+
# Decode the response
|
| 37 |
+
generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
|
| 38 |
+
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 39 |
+
|
| 40 |
+
# Save response as audio
|
| 41 |
# url_path = save_audio(request, response)
|
| 42 |
|
| 43 |
+
return {"response": response, "audio": "url_path"} # Return the response and audio URL for tests
|
|
|