Spaces:
Sleeping
Sleeping
FIX: Handle NLTK download reliably with nltk.txt
Browse files- build.sh +2 -1
- build_vocab.py +0 -7
- nltk.txt +1 -0
build.sh
CHANGED
|
@@ -4,6 +4,7 @@ set -e
|
|
| 4 |
# Install all the python packages
|
| 5 |
echo "--- Installing dependencies ---"
|
| 6 |
pip install -r requirements.txt
|
|
|
|
| 7 |
|
| 8 |
# Download ONLY the large model files from the GitHub Release
|
| 9 |
echo "--- Downloading model files ---"
|
|
@@ -11,7 +12,7 @@ wget -O decoder-model.pth "https://github.com/Ishu-Kaur/Image-Caption-AI/release
|
|
| 11 |
wget -O encoder-model.pth "https://github.com/Ishu-Kaur/Image-Caption-AI/releases/download/v1.0.1/encoder-model.pth"
|
| 12 |
echo "--- Model files downloaded successfully ---"
|
| 13 |
|
| 14 |
-
#
|
| 15 |
echo "--- Building vocabulary file ---"
|
| 16 |
python build_vocab.py
|
| 17 |
echo "--- Vocabulary file built successfully ---"
|
|
|
|
| 4 |
# Install all the python packages
|
| 5 |
echo "--- Installing dependencies ---"
|
| 6 |
pip install -r requirements.txt
|
| 7 |
+
pip install -r nltk.txt -d /opt/render/project/src/nltk_data # <-- ADD THIS LINE
|
| 8 |
|
| 9 |
# Download ONLY the large model files from the GitHub Release
|
| 10 |
echo "--- Downloading model files ---"
|
|
|
|
| 12 |
wget -O encoder-model.pth "https://github.com/Ishu-Kaur/Image-Caption-AI/releases/download/v1.0.1/encoder-model.pth"
|
| 13 |
echo "--- Model files downloaded successfully ---"
|
| 14 |
|
| 15 |
+
# Build the vocabulary file directly on the server
|
| 16 |
echo "--- Building vocabulary file ---"
|
| 17 |
python build_vocab.py
|
| 18 |
echo "--- Vocabulary file built successfully ---"
|
build_vocab.py
CHANGED
|
@@ -56,13 +56,6 @@ class Vocabulary:
|
|
| 56 |
if __name__ == "__main__":
|
| 57 |
print("Starting vocabulary creation process...")
|
| 58 |
|
| 59 |
-
# Download the NLTK tokenizer model (only needs to be done once)
|
| 60 |
-
try:
|
| 61 |
-
nltk.data.find('tokenizers/punkt')
|
| 62 |
-
except LookupError: # <-- This is a more robust way to check
|
| 63 |
-
print("Downloading NLTK 'punkt' model...")
|
| 64 |
-
nltk.download('punkt')
|
| 65 |
-
|
| 66 |
# Load the Flickr8k training data from Hugging Face
|
| 67 |
print("Loading Flickr8k dataset from Hugging Face...")
|
| 68 |
train_dataset = load_dataset("jxie/flickr8k", split="train")
|
|
|
|
| 56 |
if __name__ == "__main__":
|
| 57 |
print("Starting vocabulary creation process...")
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
# Load the Flickr8k training data from Hugging Face
|
| 60 |
print("Loading Flickr8k dataset from Hugging Face...")
|
| 61 |
train_dataset = load_dataset("jxie/flickr8k", split="train")
|
nltk.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
punkt
|