Sumkh commited on
Commit
24a23b6
·
verified ·
1 Parent(s): fb72eed

Upload 3 files

Browse files
Files changed (2) hide show
  1. Dockerfile +12 -13
  2. app.py +23 -8
Dockerfile CHANGED
@@ -1,30 +1,32 @@
1
  # Use the official vLLM Docker image as the base image
2
  FROM vllm/vllm-openai:latest
3
 
4
- # Use the root user to ensure write permissions (if needed)
5
  USER root
6
 
7
- # Set the working directory
8
- WORKDIR /app
9
-
10
- # Install system dependencies if needed (e.g., wget)
11
  RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
12
 
13
- # Create and set permissions for cache directories (for Hugging Face, matplotlib, etc.)
 
 
 
14
  RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
15
- && mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
 
16
 
17
- # Set environment variables for cache directories and user agent
18
  ENV HF_HOME=/app/.cache
19
  ENV XDG_CACHE_HOME=/app/.cache
20
  ENV MPLCONFIGDIR=/app/.config/matplotlib
21
  ENV USER_AGENT="my-gradio-app"
 
22
 
23
- # Copy the requirements file and install additional Python dependencies (e.g., gradio)
24
  COPY requirements.txt .
25
  RUN pip install --no-cache-dir -r requirements.txt
26
 
27
- # Copy your application code (including app.py and any other needed files)
28
  COPY . .
29
 
30
  # Expose the port for Gradio (Spaces expects the app on port 7860)
@@ -33,8 +35,5 @@ EXPOSE 7860
33
  # Override the base image's entrypoint so our CMD is executed directly.
34
  ENTRYPOINT []
35
 
36
- # Create a writable log file (alternatively, you could redirect logs to /tmp)
37
- RUN touch /app/vllm.log && chmod 666 /app/vllm.log
38
-
39
  # Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
40
  CMD ["bash", "-c", "vllm.entrypoints.openai.api_server --model unsloth/llama-3-8b-Instruct-bnb-4bit --enable-auto-tool-choice --tool-call-parser llama3_json --chat-template examples/tool_chat_template_llama3.1_json.jinja --quantization bitsandbytes --load-format bitsandbytes --dtype half --max-model-len 8192 > /app/vllm.log 2>&1 & python3 app.py"]
 
1
  # Use the official vLLM Docker image as the base image
2
  FROM vllm/vllm-openai:latest
3
 
4
+ # Ensure we run as root (the default) so we can set permissions
5
  USER root
6
 
7
+ # Install system dependencies
 
 
 
8
  RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
9
 
10
+ # Set working directory
11
+ WORKDIR /app
12
+
13
+ # Create and set permissions for cache directories
14
  RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
15
+ && mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib \
16
+ && mkdir -p /.EasyOCR && chmod -R 777 /.EasyOCR
17
 
18
+ # Set environment variables for cache directories
19
  ENV HF_HOME=/app/.cache
20
  ENV XDG_CACHE_HOME=/app/.cache
21
  ENV MPLCONFIGDIR=/app/.config/matplotlib
22
  ENV USER_AGENT="my-gradio-app"
23
+ ENV EASYOCR_CACHE_DIR=/app/.EasyOCR
24
 
25
+ # Copy the requirements file and install dependencies
26
  COPY requirements.txt .
27
  RUN pip install --no-cache-dir -r requirements.txt
28
 
29
+ # Copy the rest of the application code
30
  COPY . .
31
 
32
  # Expose the port for Gradio (Spaces expects the app on port 7860)
 
35
  # Override the base image's entrypoint so our CMD is executed directly.
36
  ENTRYPOINT []
37
 
 
 
 
38
  # Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
39
  CMD ["bash", "-c", "vllm.entrypoints.openai.api_server --model unsloth/llama-3-8b-Instruct-bnb-4bit --enable-auto-tool-choice --tool-call-parser llama3_json --chat-template examples/tool_chat_template_llama3.1_json.jinja --quantization bitsandbytes --load-format bitsandbytes --dtype half --max-model-len 8192 > /app/vllm.log 2>&1 & python3 app.py"]
app.py CHANGED
@@ -2,7 +2,19 @@ from io import StringIO
2
  import sys
3
 
4
  import os
5
- from huggingface_hub import login
 
 
 
 
 
 
 
 
 
 
 
 
6
  import gradio as gr
7
  import json
8
  import csv
@@ -56,11 +68,11 @@ logger = logging.getLogger(__name__)
56
  logging.disable(logging.WARNING)
57
 
58
 
59
- HF_TOKEN = os.getenv("HF_TOKEN") # Read from environment variable
60
- if HF_TOKEN:
61
- login(token=HF_TOKEN) # Log in to Hugging Face Hub
62
- else:
63
- print("Warning: HF_TOKEN not found in environment variables.")
64
 
65
  # GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Read from environment variable
66
  # if not GROQ_API_KEY:
@@ -551,6 +563,7 @@ model = ChatOpenAI(
551
  base_url="http://localhost:8000/v1", # Use the VLLM instance URL
552
  )
553
 
 
554
  # model = ChatGroq(
555
  # model_name="deepseek-r1-distill-llama-70b",
556
  # temperature=TEMPERATURE,
@@ -991,11 +1004,13 @@ with gr.Blocks(theme="ocean") as AI_Tutor:
991
  type="messages",
992
  chatbot=gr.Chatbot(
993
  label="Chat Window",
994
- height=500
 
995
  ),
996
  textbox=gr.MultimodalTextbox(
 
997
  file_count="multiple",
998
- file_types=None,
999
  sources=["upload"],
1000
  label="Type your query here:",
1001
  placeholder="Enter your question...",
 
2
  import sys
3
 
4
  import os
5
+ # Set EasyOCR cache directory to a writable location
6
+ os.environ["EASYOCR_CACHE_DIR"] = "/app/.EASYOCR"
7
+ import easyocr
8
+ # Monkey-patch the easyocr.Reader to force the model_storage directory parameter
9
+ _original_init = easyocr.Reader.__init__
10
+ def new_init(self, *args, **kwargs):
11
+ if args and "lang_list" in kwargs:
12
+ del kwargs["lang_list"]
13
+ kwargs.setdefault("model_storage_directory", "/app/.EasyOCR")
14
+ _original_init(self, *args, **kwargs)
15
+ easyocr.Reader.__init__ = new_init
16
+
17
+ #from huggingface_hub import login
18
  import gradio as gr
19
  import json
20
  import csv
 
68
  logging.disable(logging.WARNING)
69
 
70
 
71
+ # HF_TOKEN = os.getenv("HF_TOKEN") # Read from environment variable
72
+ # if HF_TOKEN:
73
+ # login(token=HF_TOKEN) # Log in to Hugging Face Hub
74
+ # else:
75
+ # print("Warning: HF_TOKEN not found in environment variables.")
76
 
77
  # GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Read from environment variable
78
  # if not GROQ_API_KEY:
 
563
  base_url="http://localhost:8000/v1", # Use the VLLM instance URL
564
  )
565
 
566
+
567
  # model = ChatGroq(
568
  # model_name="deepseek-r1-distill-llama-70b",
569
  # temperature=TEMPERATURE,
 
1004
  type="messages",
1005
  chatbot=gr.Chatbot(
1006
  label="Chat Window",
1007
+ height=500,
1008
+ type="messages"
1009
  ),
1010
  textbox=gr.MultimodalTextbox(
1011
+ interactive=True,
1012
  file_count="multiple",
1013
+ file_types=[".pdf",".ppt",".pptx",".doc",".docx",".md","image"],
1014
  sources=["upload"],
1015
  label="Type your query here:",
1016
  placeholder="Enter your question...",