Spaces:
Sleeping
Sleeping
Commit
·
433a189
1
Parent(s):
f98f8ce
Fix build issues and optimize Dockerfile
Browse files- Dockerfile +5 -10
- README.md +4 -3
- app.py +8 -18
- packages.txt +1 -1
- requirements.txt +5 -5
Dockerfile
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
-
FROM python:3.
|
| 2 |
|
| 3 |
WORKDIR /code
|
| 4 |
|
| 5 |
-
#
|
| 6 |
COPY packages.txt /root/packages.txt
|
| 7 |
RUN apt-get update && \
|
| 8 |
xargs -r -a /root/packages.txt apt-get install -y && \
|
|
@@ -12,20 +12,15 @@ RUN apt-get update && \
|
|
| 12 |
COPY requirements.txt .
|
| 13 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 14 |
|
| 15 |
-
#
|
| 16 |
-
RUN pip install --no-cache-dir llama-cpp-python
|
| 17 |
-
|
| 18 |
-
# Set Hugging Face cache directory to a writable location
|
| 19 |
ENV HF_HOME=/code/.cache/huggingface
|
| 20 |
RUN mkdir -p /code/.cache/huggingface && \
|
| 21 |
-
|
|
|
|
| 22 |
|
| 23 |
# Copy application code
|
| 24 |
COPY . .
|
| 25 |
|
| 26 |
-
# Ensure correct permissions for the working directory
|
| 27 |
-
RUN chmod -R 777 /code
|
| 28 |
-
|
| 29 |
# Expose port
|
| 30 |
EXPOSE 7860
|
| 31 |
|
|
|
|
| 1 |
+
FROM python:3.12
|
| 2 |
|
| 3 |
WORKDIR /code
|
| 4 |
|
| 5 |
+
# Install system dependencies
|
| 6 |
COPY packages.txt /root/packages.txt
|
| 7 |
RUN apt-get update && \
|
| 8 |
xargs -r -a /root/packages.txt apt-get install -y && \
|
|
|
|
| 12 |
COPY requirements.txt .
|
| 13 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 14 |
|
| 15 |
+
# Pre-download the model
|
|
|
|
|
|
|
|
|
|
| 16 |
ENV HF_HOME=/code/.cache/huggingface
|
| 17 |
RUN mkdir -p /code/.cache/huggingface && \
|
| 18 |
+
pip install huggingface_hub && \
|
| 19 |
+
python -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='muhammadnoman76/cortex_q4', filename='unsloth.Q4_K_M.gguf', local_dir='/code', local_dir_use_symlinks=False)"
|
| 20 |
|
| 21 |
# Copy application code
|
| 22 |
COPY . .
|
| 23 |
|
|
|
|
|
|
|
|
|
|
| 24 |
# Expose port
|
| 25 |
EXPOSE 7860
|
| 26 |
|
README.md
CHANGED
|
@@ -10,10 +10,11 @@ license: afl-3.0
|
|
| 10 |
|
| 11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 12 |
|
| 13 |
-
|
| 14 |
# LLM Streaming API
|
| 15 |
|
| 16 |
This Space provides a FastAPI application that streams responses from the Cortex LLM model.
|
| 17 |
|
| 18 |
-
-
|
| 19 |
-
-
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 12 |
|
|
|
|
| 13 |
# LLM Streaming API
|
| 14 |
|
| 15 |
This Space provides a FastAPI application that streams responses from the Cortex LLM model.
|
| 16 |
|
| 17 |
+
- Send GET requests to `/stream?task=<your_task>` to receive a streamed response from the model.
|
| 18 |
+
- Example: `/stream?task=make an agent which send mail by searching top 5 website from google`
|
| 19 |
+
|
| 20 |
+
**Note**: The `/ui` endpoint is not implemented in the current version.
|
app.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
from fastapi.responses import StreamingResponse
|
| 3 |
-
from huggingface_hub import hf_hub_download
|
| 4 |
from llama_cpp import Llama
|
| 5 |
import asyncio
|
| 6 |
from fastapi.middleware.cors import CORSMiddleware
|
|
@@ -15,14 +14,13 @@ app.add_middleware(
|
|
| 15 |
allow_headers=["*"],
|
| 16 |
)
|
| 17 |
|
| 18 |
-
#
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
model_path
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
local_dir_use_symlinks=False
|
| 26 |
)
|
| 27 |
|
| 28 |
alpaca_prompt = """
|
|
@@ -51,14 +49,6 @@ Important notes:
|
|
| 51 |
### Response:
|
| 52 |
"""
|
| 53 |
|
| 54 |
-
# Load model from local file in the copied folder
|
| 55 |
-
llm = Llama(
|
| 56 |
-
model_path= r'.//unsloth.Q4_K_M.gguf',
|
| 57 |
-
n_ctx=2048,
|
| 58 |
-
n_batch=512,
|
| 59 |
-
verbose=False
|
| 60 |
-
)
|
| 61 |
-
|
| 62 |
async def stream_llm_response(task_description: str):
|
| 63 |
prompt = alpaca_prompt.format(task_description)
|
| 64 |
stream = llm(
|
|
@@ -77,4 +67,4 @@ async def stream_response(task: str = "make an agent which send mail by searchin
|
|
| 77 |
|
| 78 |
if __name__ == "__main__":
|
| 79 |
import uvicorn
|
| 80 |
-
uvicorn.run(app, host="0.0.0.0", port=
|
|
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
from fastapi.responses import StreamingResponse
|
|
|
|
| 3 |
from llama_cpp import Llama
|
| 4 |
import asyncio
|
| 5 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
| 14 |
allow_headers=["*"],
|
| 15 |
)
|
| 16 |
|
| 17 |
+
# Load model from local file
|
| 18 |
+
model_path = "./unsloth.Q4_K_M.gguf"
|
| 19 |
+
llm = Llama(
|
| 20 |
+
model_path=model_path,
|
| 21 |
+
n_ctx=2048,
|
| 22 |
+
n_batch=512,
|
| 23 |
+
verbose=False
|
|
|
|
| 24 |
)
|
| 25 |
|
| 26 |
alpaca_prompt = """
|
|
|
|
| 49 |
### Response:
|
| 50 |
"""
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
async def stream_llm_response(task_description: str):
|
| 53 |
prompt = alpaca_prompt.format(task_description)
|
| 54 |
stream = llm(
|
|
|
|
| 67 |
|
| 68 |
if __name__ == "__main__":
|
| 69 |
import uvicorn
|
| 70 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
packages.txt
CHANGED
|
@@ -2,4 +2,4 @@ build-essential
|
|
| 2 |
cmake
|
| 3 |
git
|
| 4 |
libopenblas-dev
|
| 5 |
-
libomp-dev
|
|
|
|
| 2 |
cmake
|
| 3 |
git
|
| 4 |
libopenblas-dev
|
| 5 |
+
libomp-dev
|
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
fastapi
|
| 2 |
-
uvicorn
|
| 3 |
-
pydantic
|
| 4 |
-
llama-cpp-python
|
| 5 |
-
huggingface_hub
|
|
|
|
| 1 |
+
fastapi==0.115.12
|
| 2 |
+
uvicorn==0.34.2
|
| 3 |
+
pydantic==2.11.4
|
| 4 |
+
llama-cpp-python==0.3.8
|
| 5 |
+
huggingface_hub==0.30.2
|