Spaces:
Sleeping
Sleeping
Add Streamlit configuration and enhance Dockerfile for improved security and environment setup; update requirements for specific package versions
Browse files- .streamlit/config.toml +18 -0
- Dockerfile +35 -7
- requirements.txt +12 -11
- src/streamlit_app.py +19 -4
.streamlit/config.toml
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[server]
|
| 2 |
+
port = 8501
|
| 3 |
+
address = "0.0.0.0"
|
| 4 |
+
headless = true
|
| 5 |
+
enableCORS = false
|
| 6 |
+
|
| 7 |
+
[browser]
|
| 8 |
+
gatherUsageStats = false
|
| 9 |
+
|
| 10 |
+
[runner]
|
| 11 |
+
fastReruns = true
|
| 12 |
+
|
| 13 |
+
[theme]
|
| 14 |
+
primaryColor = "#2196F3"
|
| 15 |
+
backgroundColor = "#FFFFFF"
|
| 16 |
+
secondaryBackgroundColor = "#F0F2F6"
|
| 17 |
+
textColor = "#262730"
|
| 18 |
+
font = "sans serif"
|
Dockerfile
CHANGED
|
@@ -2,6 +2,18 @@ FROM python:3.9-slim
|
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
# Install system dependencies including ffmpeg for audio processing
|
| 6 |
# Add retry logic and better mirrors for improved network reliability
|
| 7 |
RUN apt-get update --allow-releaseinfo-change || (sleep 2 && apt-get update) && \
|
|
@@ -19,7 +31,6 @@ RUN apt-get update --allow-releaseinfo-change || (sleep 2 && apt-get update) &&
|
|
| 19 |
# Set pip to have more retries and timeout
|
| 20 |
ENV PIP_DEFAULT_TIMEOUT=100
|
| 21 |
ENV PIP_RETRIES=3
|
| 22 |
-
ENV PYTHONUNBUFFERED=1
|
| 23 |
|
| 24 |
# Copy requirements and install Python dependencies
|
| 25 |
COPY requirements.txt ./
|
|
@@ -27,14 +38,31 @@ RUN pip install --upgrade pip && \
|
|
| 27 |
pip install --no-cache-dir -r requirements.txt || \
|
| 28 |
(sleep 2 && pip install --no-cache-dir -r requirements.txt)
|
| 29 |
|
| 30 |
-
# Create cookies directory for user uploads
|
| 31 |
-
RUN mkdir -p /app/cookies
|
| 32 |
-
|
| 33 |
# Copy source code
|
| 34 |
COPY src/ ./src/
|
| 35 |
|
| 36 |
-
# Create
|
| 37 |
-
RUN mkdir -p /app/tmp_model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
# Expose the port Streamlit will run on
|
| 40 |
EXPOSE 8501
|
|
@@ -43,4 +71,4 @@ EXPOSE 8501
|
|
| 43 |
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
| 44 |
|
| 45 |
# Run the Streamlit app
|
| 46 |
-
ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py"
|
|
|
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
+
# Set environment variables to avoid permission errors
|
| 6 |
+
ENV HOME=/app \
|
| 7 |
+
PYTHONUNBUFFERED=1 \
|
| 8 |
+
PYTHONDONTWRITEBYTECODE=1 \
|
| 9 |
+
MPLCONFIGDIR=/tmp/matplotlib \
|
| 10 |
+
TRANSFORMERS_CACHE=/app/.cache/huggingface \
|
| 11 |
+
XDG_CACHE_HOME=/app/.cache
|
| 12 |
+
|
| 13 |
+
# Create non-root user for better security
|
| 14 |
+
RUN groupadd -g 1000 appuser && \
|
| 15 |
+
useradd -u 1000 -g appuser -s /bin/sh -m appuser
|
| 16 |
+
|
| 17 |
# Install system dependencies including ffmpeg for audio processing
|
| 18 |
# Add retry logic and better mirrors for improved network reliability
|
| 19 |
RUN apt-get update --allow-releaseinfo-change || (sleep 2 && apt-get update) && \
|
|
|
|
| 31 |
# Set pip to have more retries and timeout
|
| 32 |
ENV PIP_DEFAULT_TIMEOUT=100
|
| 33 |
ENV PIP_RETRIES=3
|
|
|
|
| 34 |
|
| 35 |
# Copy requirements and install Python dependencies
|
| 36 |
COPY requirements.txt ./
|
|
|
|
| 38 |
pip install --no-cache-dir -r requirements.txt || \
|
| 39 |
(sleep 2 && pip install --no-cache-dir -r requirements.txt)
|
| 40 |
|
|
|
|
|
|
|
|
|
|
| 41 |
# Copy source code
|
| 42 |
COPY src/ ./src/
|
| 43 |
|
| 44 |
+
# Create directories with proper permissions
|
| 45 |
+
RUN mkdir -p /app/cookies /app/tmp_model /app/.streamlit /app/.cache && \
|
| 46 |
+
chown -R appuser:appuser /app
|
| 47 |
+
|
| 48 |
+
# Switch to non-root user for better security
|
| 49 |
+
USER appuser
|
| 50 |
+
|
| 51 |
+
# Create a Streamlit configuration file to avoid permission issues
|
| 52 |
+
RUN mkdir -p /app/.streamlit && \
|
| 53 |
+
echo '\
|
| 54 |
+
[server]\n\
|
| 55 |
+
port = 8501\n\
|
| 56 |
+
address = "0.0.0.0"\n\
|
| 57 |
+
headless = true\n\
|
| 58 |
+
enableCORS = false\n\
|
| 59 |
+
\n\
|
| 60 |
+
[browser]\n\
|
| 61 |
+
gatherUsageStats = false\n\
|
| 62 |
+
\n\
|
| 63 |
+
[runner]\n\
|
| 64 |
+
fastReruns = true\n\
|
| 65 |
+
' > /app/.streamlit/config.toml
|
| 66 |
|
| 67 |
# Expose the port Streamlit will run on
|
| 68 |
EXPOSE 8501
|
|
|
|
| 71 |
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
| 72 |
|
| 73 |
# Run the Streamlit app
|
| 74 |
+
ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py"]
|
requirements.txt
CHANGED
|
@@ -1,11 +1,12 @@
|
|
| 1 |
-
streamlit
|
| 2 |
-
yt_dlp
|
| 3 |
-
speechbrain
|
| 4 |
-
torch
|
| 5 |
-
torchaudio
|
| 6 |
-
transformers
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
| 1 |
+
streamlit==1.31.0
|
| 2 |
+
yt_dlp==2023.11.16
|
| 3 |
+
speechbrain==0.5.15
|
| 4 |
+
torch==2.0.1
|
| 5 |
+
torchaudio==2.0.2
|
| 6 |
+
# Pin transformers to version that has AutoProcessor
|
| 7 |
+
transformers==4.31.0
|
| 8 |
+
librosa==0.10.1
|
| 9 |
+
matplotlib==3.7.2
|
| 10 |
+
scikit-learn==1.3.0
|
| 11 |
+
openai==1.0.0
|
| 12 |
+
python-dotenv==1.0.0
|
src/streamlit_app.py
CHANGED
|
@@ -5,8 +5,16 @@ import subprocess
|
|
| 5 |
import librosa
|
| 6 |
import numpy as np
|
| 7 |
import torch
|
|
|
|
| 8 |
from speechbrain.inference.classifiers import EncoderClassifier
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
import matplotlib.pyplot as plt
|
| 12 |
import tempfile
|
|
@@ -113,12 +121,19 @@ class AccentDetector:
|
|
| 113 |
source="speechbrain/lang-id-commonlanguage_ecapa",
|
| 114 |
savedir="tmp_model"
|
| 115 |
)
|
| 116 |
-
|
| 117 |
-
# Initialize the English accent classifier - using VoxLingua107 for now
|
| 118 |
# In production, you'd use a more specialized accent model
|
| 119 |
try:
|
| 120 |
self.model_name = "speechbrain/lang-id-voxlingua107-ecapa"
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
self.model = AutoModelForAudioClassification.from_pretrained(self.model_name)
|
| 123 |
self.have_accent_model = True
|
| 124 |
except Exception as e:
|
|
|
|
| 5 |
import librosa
|
| 6 |
import numpy as np
|
| 7 |
import torch
|
| 8 |
+
import sys
|
| 9 |
from speechbrain.inference.classifiers import EncoderClassifier
|
| 10 |
+
# Handle potential compatibility issues with transformers
|
| 11 |
+
try:
|
| 12 |
+
from transformers import AutoProcessor, AutoModelForAudioClassification
|
| 13 |
+
HAS_AUTO_PROCESSOR = True
|
| 14 |
+
except ImportError:
|
| 15 |
+
from transformers import AutoModelForAudioClassification
|
| 16 |
+
HAS_AUTO_PROCESSOR = False
|
| 17 |
+
st.warning("Using a compatible but limited version of transformers. Some features may be limited.")
|
| 18 |
from dotenv import load_dotenv
|
| 19 |
import matplotlib.pyplot as plt
|
| 20 |
import tempfile
|
|
|
|
| 121 |
source="speechbrain/lang-id-commonlanguage_ecapa",
|
| 122 |
savedir="tmp_model"
|
| 123 |
)
|
| 124 |
+
# Initialize the English accent classifier - using VoxLingua107 for now
|
|
|
|
| 125 |
# In production, you'd use a more specialized accent model
|
| 126 |
try:
|
| 127 |
self.model_name = "speechbrain/lang-id-voxlingua107-ecapa"
|
| 128 |
+
|
| 129 |
+
# Handle case where AutoProcessor is not available
|
| 130 |
+
if HAS_AUTO_PROCESSOR:
|
| 131 |
+
self.processor = AutoProcessor.from_pretrained(self.model_name)
|
| 132 |
+
else:
|
| 133 |
+
# Fall back to using feature_extractor directly if AutoProcessor is not available
|
| 134 |
+
from transformers import AutoFeatureExtractor
|
| 135 |
+
self.processor = AutoFeatureExtractor.from_pretrained(self.model_name)
|
| 136 |
+
|
| 137 |
self.model = AutoModelForAudioClassification.from_pretrained(self.model_name)
|
| 138 |
self.have_accent_model = True
|
| 139 |
except Exception as e:
|