Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- Dockerfile +12 -13
- app.py +23 -8
Dockerfile
CHANGED
|
@@ -1,30 +1,32 @@
|
|
| 1 |
# Use the official vLLM Docker image as the base image
|
| 2 |
FROM vllm/vllm-openai:latest
|
| 3 |
|
| 4 |
-
#
|
| 5 |
USER root
|
| 6 |
|
| 7 |
-
#
|
| 8 |
-
WORKDIR /app
|
| 9 |
-
|
| 10 |
-
# Install system dependencies if needed (e.g., wget)
|
| 11 |
RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
|
| 12 |
|
| 13 |
-
#
|
|
|
|
|
|
|
|
|
|
| 14 |
RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
|
| 15 |
-
&& mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
|
|
|
|
| 16 |
|
| 17 |
-
# Set environment variables for cache directories
|
| 18 |
ENV HF_HOME=/app/.cache
|
| 19 |
ENV XDG_CACHE_HOME=/app/.cache
|
| 20 |
ENV MPLCONFIGDIR=/app/.config/matplotlib
|
| 21 |
ENV USER_AGENT="my-gradio-app"
|
|
|
|
| 22 |
|
| 23 |
-
# Copy the requirements file and install
|
| 24 |
COPY requirements.txt .
|
| 25 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 26 |
|
| 27 |
-
# Copy
|
| 28 |
COPY . .
|
| 29 |
|
| 30 |
# Expose the port for Gradio (Spaces expects the app on port 7860)
|
|
@@ -33,8 +35,5 @@ EXPOSE 7860
|
|
| 33 |
# Override the base image's entrypoint so our CMD is executed directly.
|
| 34 |
ENTRYPOINT []
|
| 35 |
|
| 36 |
-
# Create a writable log file (alternatively, you could redirect logs to /tmp)
|
| 37 |
-
RUN touch /app/vllm.log && chmod 666 /app/vllm.log
|
| 38 |
-
|
| 39 |
# Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
|
| 40 |
CMD ["bash", "-c", "vllm.entrypoints.openai.api_server --model unsloth/llama-3-8b-Instruct-bnb-4bit --enable-auto-tool-choice --tool-call-parser llama3_json --chat-template examples/tool_chat_template_llama3.1_json.jinja --quantization bitsandbytes --load-format bitsandbytes --dtype half --max-model-len 8192 > /app/vllm.log 2>&1 & python3 app.py"]
|
|
|
|
| 1 |
# Use the official vLLM Docker image as the base image
|
| 2 |
FROM vllm/vllm-openai:latest
|
| 3 |
|
| 4 |
+
# Ensure we run as root (the default) so we can set permissions
|
| 5 |
USER root
|
| 6 |
|
| 7 |
+
# Install system dependencies
|
|
|
|
|
|
|
|
|
|
| 8 |
RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
|
| 9 |
|
| 10 |
+
# Set working directory
|
| 11 |
+
WORKDIR /app
|
| 12 |
+
|
| 13 |
+
# Create and set permissions for cache directories
|
| 14 |
RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
|
| 15 |
+
&& mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib \
|
| 16 |
+
&& mkdir -p /.EasyOCR && chmod -R 777 /.EasyOCR
|
| 17 |
|
| 18 |
+
# Set environment variables for cache directories
|
| 19 |
ENV HF_HOME=/app/.cache
|
| 20 |
ENV XDG_CACHE_HOME=/app/.cache
|
| 21 |
ENV MPLCONFIGDIR=/app/.config/matplotlib
|
| 22 |
ENV USER_AGENT="my-gradio-app"
|
| 23 |
+
ENV EASYOCR_CACHE_DIR=/app/.EasyOCR
|
| 24 |
|
| 25 |
+
# Copy the requirements file and install dependencies
|
| 26 |
COPY requirements.txt .
|
| 27 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 28 |
|
| 29 |
+
# Copy the rest of the application code
|
| 30 |
COPY . .
|
| 31 |
|
| 32 |
# Expose the port for Gradio (Spaces expects the app on port 7860)
|
|
|
|
| 35 |
# Override the base image's entrypoint so our CMD is executed directly.
|
| 36 |
ENTRYPOINT []
|
| 37 |
|
|
|
|
|
|
|
|
|
|
| 38 |
# Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
|
| 39 |
CMD ["bash", "-c", "vllm.entrypoints.openai.api_server --model unsloth/llama-3-8b-Instruct-bnb-4bit --enable-auto-tool-choice --tool-call-parser llama3_json --chat-template examples/tool_chat_template_llama3.1_json.jinja --quantization bitsandbytes --load-format bitsandbytes --dtype half --max-model-len 8192 > /app/vllm.log 2>&1 & python3 app.py"]
|
app.py
CHANGED
|
@@ -2,7 +2,19 @@ from io import StringIO
|
|
| 2 |
import sys
|
| 3 |
|
| 4 |
import os
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import gradio as gr
|
| 7 |
import json
|
| 8 |
import csv
|
|
@@ -56,11 +68,11 @@ logger = logging.getLogger(__name__)
|
|
| 56 |
logging.disable(logging.WARNING)
|
| 57 |
|
| 58 |
|
| 59 |
-
HF_TOKEN = os.getenv("HF_TOKEN") # Read from environment variable
|
| 60 |
-
if HF_TOKEN:
|
| 61 |
-
|
| 62 |
-
else:
|
| 63 |
-
|
| 64 |
|
| 65 |
# GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Read from environment variable
|
| 66 |
# if not GROQ_API_KEY:
|
|
@@ -551,6 +563,7 @@ model = ChatOpenAI(
|
|
| 551 |
base_url="http://localhost:8000/v1", # Use the VLLM instance URL
|
| 552 |
)
|
| 553 |
|
|
|
|
| 554 |
# model = ChatGroq(
|
| 555 |
# model_name="deepseek-r1-distill-llama-70b",
|
| 556 |
# temperature=TEMPERATURE,
|
|
@@ -991,11 +1004,13 @@ with gr.Blocks(theme="ocean") as AI_Tutor:
|
|
| 991 |
type="messages",
|
| 992 |
chatbot=gr.Chatbot(
|
| 993 |
label="Chat Window",
|
| 994 |
-
height=500
|
|
|
|
| 995 |
),
|
| 996 |
textbox=gr.MultimodalTextbox(
|
|
|
|
| 997 |
file_count="multiple",
|
| 998 |
-
file_types=
|
| 999 |
sources=["upload"],
|
| 1000 |
label="Type your query here:",
|
| 1001 |
placeholder="Enter your question...",
|
|
|
|
| 2 |
import sys
|
| 3 |
|
| 4 |
import os
|
| 5 |
+
# Set EasyOCR cache directory to a writable location
|
| 6 |
+
os.environ["EASYOCR_CACHE_DIR"] = "/app/.EASYOCR"
|
| 7 |
+
import easyocr
|
| 8 |
+
# Monkey-patch the easyocr.Reader to force the model_storage directory parameter
|
| 9 |
+
_original_init = easyocr.Reader.__init__
|
| 10 |
+
def new_init(self, *args, **kwargs):
|
| 11 |
+
if args and "lang_list" in kwargs:
|
| 12 |
+
del kwargs["lang_list"]
|
| 13 |
+
kwargs.setdefault("model_storage_directory", "/app/.EasyOCR")
|
| 14 |
+
_original_init(self, *args, **kwargs)
|
| 15 |
+
easyocr.Reader.__init__ = new_init
|
| 16 |
+
|
| 17 |
+
#from huggingface_hub import login
|
| 18 |
import gradio as gr
|
| 19 |
import json
|
| 20 |
import csv
|
|
|
|
| 68 |
logging.disable(logging.WARNING)
|
| 69 |
|
| 70 |
|
| 71 |
+
# HF_TOKEN = os.getenv("HF_TOKEN") # Read from environment variable
|
| 72 |
+
# if HF_TOKEN:
|
| 73 |
+
# login(token=HF_TOKEN) # Log in to Hugging Face Hub
|
| 74 |
+
# else:
|
| 75 |
+
# print("Warning: HF_TOKEN not found in environment variables.")
|
| 76 |
|
| 77 |
# GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Read from environment variable
|
| 78 |
# if not GROQ_API_KEY:
|
|
|
|
| 563 |
base_url="http://localhost:8000/v1", # Use the VLLM instance URL
|
| 564 |
)
|
| 565 |
|
| 566 |
+
|
| 567 |
# model = ChatGroq(
|
| 568 |
# model_name="deepseek-r1-distill-llama-70b",
|
| 569 |
# temperature=TEMPERATURE,
|
|
|
|
| 1004 |
type="messages",
|
| 1005 |
chatbot=gr.Chatbot(
|
| 1006 |
label="Chat Window",
|
| 1007 |
+
height=500,
|
| 1008 |
+
type="messages"
|
| 1009 |
),
|
| 1010 |
textbox=gr.MultimodalTextbox(
|
| 1011 |
+
interactive=True,
|
| 1012 |
file_count="multiple",
|
| 1013 |
+
file_types=[".pdf",".ppt",".pptx",".doc",".docx",".md","image"],
|
| 1014 |
sources=["upload"],
|
| 1015 |
label="Type your query here:",
|
| 1016 |
placeholder="Enter your question...",
|