Spaces:

CrazyMonkey0
/

APi_English

Sleeping

App Files Files Community

CrazyMonkey0 commited on Dec 11, 2025

Commit

fe8b413

1 Parent(s): ad5570a

test(models): downloading models from transformers

Browse files

Files changed (3) hide show

Dockerfile +5 -22
app/main.py +2 -2
app/routes/nlp.py +29 -27

Dockerfile CHANGED Viewed

@@ -1,38 +1,21 @@
-# Use full Python 3.12 image for maximum compatibility
 FROM python:3.12
 # Set working directory
 WORKDIR /app
-# Install system dependencies needed for llama-cpp-python and general Python packages
-RUN apt-get update && apt-get install -y \
-    wget \
-    curl \
-    git \
-    build-essential \
-    cmake \
-    && rm -rf /var/lib/apt/lists/*
 # Upgrade pip
 RUN pip install --upgrade pip
-# Install prebuilt llama-cpp-python (CPU) and Hugging Face hub for from_pretrained()
-RUN pip install --no-cache-dir \
-    llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu \
-    huggingface-hub
-# Copy requirements and install other dependencies
 COPY ./requirements.txt /app/requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
-# Copy the application code
 COPY . /app
-# Ensure models folder exists (optional, can store HF cache here)
-RUN mkdir -p /app/models
 # Expose FastAPI port
 EXPOSE 7860
-# Use Gunicorn with Uvicorn workers for production
-CMD ["gunicorn", "app.main:app", "-k", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:7860", "--workers", "2"]

+# Use official Python 3.12 image
 FROM python:3.12
 # Set working directory
 WORKDIR /app
 # Upgrade pip
 RUN pip install --upgrade pip
+# Copy requirements and install dependencies
 COPY ./requirements.txt /app/requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
 COPY . /app
 # Expose FastAPI port
 EXPOSE 7860
+# Use Gunicorn with Uvicorn workers
+CMD ["gunicorn", "app.main:app", "-k", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:7860", "--workers", "1"]

app/main.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from fastapi import FastAPI
-from app.routes.nlp import load_model_lama, router as nlp_router
 from app.routes.tts import load_model_tts
 from app.routes.asr import load_model_asr, router as asr_router
 from app.routes.translation import load_model_translation, router as trans_router
@@ -12,7 +12,7 @@ app = FastAPI(debug=False)
 async def startup_event():
     print("[INFO] Loading all models...")
     try:
-        app.state.model_lama = load_model_lama()
         app.state.model_trans, app.state.tokenizer_trans = load_model_translation()
         app.state.model_tts = load_model_tts()
         app.state.processor_asr, app.state.model_asr = load_model_asr()

 from fastapi import FastAPI
+from app.routes.nlp import load_model_nlp, router as nlp_router
 from app.routes.tts import load_model_tts
 from app.routes.asr import load_model_asr, router as asr_router
 from app.routes.translation import load_model_translation, router as trans_router
 async def startup_event():
     print("[INFO] Loading all models...")
     try:
+        app.state.model_nlp, app.state.tokenizer_nlp = load_model_nlp()
         app.state.model_trans, app.state.tokenizer_trans = load_model_translation()
         app.state.model_tts = load_model_tts()
         app.state.processor_asr, app.state.model_asr = load_model_asr()

app/routes/nlp.py CHANGED Viewed

@@ -1,41 +1,43 @@
 from fastapi import APIRouter, Request
 from pydantic import BaseModel
-from llama_cpp import Llama
 from .tts import save_audio
-import os
 router = APIRouter()
 class ChatRequest(BaseModel):
     message: str
-# Load model function
-def load_model_lama():
-    return Llama.from_pretrained(
-        repo_id="Qwen/Qwen3-8B-GGUF",
-        filename="Qwen3-8B-Q4_K_M.gguf",
-        n_ctx=2048,
-        n_threads=8,
-        temperature=0.7,
-        top_p=0.9,
-    )
-# FastAPI startup event (w main.py)
-# app.state.model_lama = load_model_lama()
 @router.post("/chat")
 async def chat(request: Request, message: ChatRequest):
-    prompt = message.message
-    # download model from app state
-    model = request.app.state.model_lama
-    # generate response
-    output = model(prompt, max_tokens=512)
-    response = output["choices"][0]["text"]
-    # # Save audio and get URL path
     # url_path = save_audio(request, response)
-    # return {"response": response, "audio": url_path}
-    return {"response": response, "audio": "TTS disabled"}

 from fastapi import APIRouter, Request
+from transformers import AutoModelForCausalLM, AutoTokenizer
 from pydantic import BaseModel
 from .tts import save_audio
 router = APIRouter()
+model_name = "Qwen/Qwen2.5-1.5B-Instruct"
 class ChatRequest(BaseModel):
     message: str
+def load_model_nlp():
+    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="cpu")
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    return model, tokenizer
+# Handle chat requests
 @router.post("/chat")
 async def chat(request: Request, message: ChatRequest):
+    message = message.message
+    # Get the loaded NLP model and tokenizer
+    model, tokenizer = request.app.state.model_nlp, request.app.state.tokenizer_nlp
+    # Prepare the conversation context
+    messages = [
+        {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant for learning English."},
+        {"role": "user", "content": message},
+    ]
+    # Tokenize input and generate a response
+    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    generated_ids = model.generate(**model_inputs, max_new_tokens=512)
+    # Decode the response
+    generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    # Save response as audio
     # url_path = save_audio(request, response)
+    return {"response": response, "audio": "url_path"} # Return the response and audio URL for tests