bluewhale2025 commited on
Commit
83a76fb
Β·
1 Parent(s): 544d677

Fix Gradio File component and NLTK data path

Browse files
Files changed (2) hide show
  1. Dockerfile +15 -13
  2. app.py +2 -2
Dockerfile CHANGED
@@ -38,25 +38,27 @@ RUN useradd -m -u 1000 user && \
38
  # Copy requirements first to leverage Docker cache
39
  COPY --chown=user:user requirements.txt .
40
 
41
- # Install Python dependencies and NLTK data as root
42
  USER root
43
 
44
- # Create NLTK data directory with proper permissions
45
- RUN mkdir -p /usr/share/nltk_data/tokenizers \
46
- && chmod -R 777 /usr/share/nltk_data
47
 
48
- # Install NLTK and download data
49
- RUN pip install --no-cache-dir -r requirements.txt \
50
- && python -c "import nltk; nltk.download('punkt', download_dir='/usr/share/nltk_data')" \
51
- && python -c "import nltk; nltk.download('stopwords', download_dir='/usr/share/nltk_data')" \
52
- && python -c "import nltk; nltk.download('wordnet', download_dir='/usr/share/nltk_data')" \
53
- && python -c "import nltk; nltk.download('averaged_perceptron_tagger', download_dir='/usr/share/nltk_data')" \
54
- && chmod -R 755 /usr/share/nltk_data
 
 
55
 
56
  # Set NLTK_DATA environment variable
57
- ENV NLTK_DATA=/usr/share/nltk_data
58
 
59
- # Switch back to non-root user
60
  USER user
61
 
62
  # Copy application files
 
38
  # Copy requirements first to leverage Docker cache
39
  COPY --chown=user:user requirements.txt .
40
 
41
+ # Install Python dependencies
42
  USER root
43
 
44
+ # Create a directory for NLTK data with proper permissions
45
+ RUN mkdir -p /usr/local/share/nltk_data \
46
+ && chmod -R 777 /usr/local/share/nltk_data
47
 
48
+ # Install Python dependencies
49
+ RUN pip install --no-cache-dir -r requirements.txt
50
+
51
+ # Download NLTK data as root
52
+ RUN python -c "import nltk; nltk.download('punkt', download_dir='/usr/local/share/nltk_data')" \
53
+ && python -c "import nltk; nltk.download('stopwords', download_dir='/usr/local/share/nltk_data')" \
54
+ && python -c "import nltk; nltk.download('wordnet', download_dir='/usr/local/share/nltk_data')" \
55
+ && python -c "import nltk; nltk.download('averaged_perceptron_tagger', download_dir='/usr/local/share/nltk_data')" \
56
+ && chmod -R 755 /usr/local/share/nltk_data
57
 
58
  # Set NLTK_DATA environment variable
59
+ ENV NLTK_DATA=/usr/local/share/nltk_data
60
 
61
+ # Switch to non-root user
62
  USER user
63
 
64
  # Copy application files
app.py CHANGED
@@ -43,7 +43,7 @@ BASE_DIR = Path("/home/user/app/data")
43
  UPLOAD_DIR = BASE_DIR / "uploads"
44
  PROCESSED_DIR = BASE_DIR / "processed"
45
  # Use system NLTK data directory that we'll populate in the Dockerfile
46
- NLTK_DATA_DIR = Path("/usr/share/nltk_data")
47
 
48
  # Ensure directories exist with proper permissions
49
  for directory in [BASE_DIR, UPLOAD_DIR, PROCESSED_DIR]:
@@ -212,7 +212,7 @@ with gr.Blocks() as demo:
212
  gr.Markdown("# ParseAI PDF 뢄석 μ„œλΉ„μŠ€")
213
 
214
  with gr.Tab("PDF μ—…λ‘œλ“œ"):
215
- file_input = gr.File(type="file", file_types=[".pdf"])
216
  upload_button = gr.Button("μ—…λ‘œλ“œ")
217
  summary_output = gr.Textbox(label="μš”μ•½")
218
 
 
43
  UPLOAD_DIR = BASE_DIR / "uploads"
44
  PROCESSED_DIR = BASE_DIR / "processed"
45
  # Use system NLTK data directory that we'll populate in the Dockerfile
46
+ NLTK_DATA_DIR = Path("/usr/local/share/nltk_data")
47
 
48
  # Ensure directories exist with proper permissions
49
  for directory in [BASE_DIR, UPLOAD_DIR, PROCESSED_DIR]:
 
212
  gr.Markdown("# ParseAI PDF 뢄석 μ„œλΉ„μŠ€")
213
 
214
  with gr.Tab("PDF μ—…λ‘œλ“œ"):
215
+ file_input = gr.File(type="filepath", file_types=["pdf"])
216
  upload_button = gr.Button("μ—…λ‘œλ“œ")
217
  summary_output = gr.Textbox(label="μš”μ•½")
218