Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,9 +7,16 @@ from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoToken
|
|
| 7 |
import nltk
|
| 8 |
import tempfile
|
| 9 |
import zipfile
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
|
|
|
| 11 |
nltk.download('wordnet')
|
| 12 |
nltk.download('omw-1.4')
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# Load the pre-trained models for image captioning and summarization
|
| 15 |
model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-85k-09"
|
|
@@ -35,15 +42,22 @@ def get_synonyms(word):
|
|
| 35 |
synonyms.add(lemma.name())
|
| 36 |
return synonyms
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
def search_captions(query, captions):
|
| 39 |
-
|
| 40 |
-
query_synonyms = set(query_words)
|
| 41 |
-
for word in query_words:
|
| 42 |
-
query_synonyms.update(get_synonyms(word))
|
| 43 |
|
| 44 |
results = []
|
| 45 |
for path, caption in captions.items():
|
| 46 |
-
|
|
|
|
| 47 |
results.append((path, caption))
|
| 48 |
|
| 49 |
return results
|
|
|
|
| 7 |
import nltk
|
| 8 |
import tempfile
|
| 9 |
import zipfile
|
| 10 |
+
from nltk.corpus import wordnet
|
| 11 |
+
import spacy
|
| 12 |
+
import io
|
| 13 |
+
from spacy.cli import download
|
| 14 |
|
| 15 |
+
# Download necessary NLP models
|
| 16 |
nltk.download('wordnet')
|
| 17 |
nltk.download('omw-1.4')
|
| 18 |
+
download("en_core_web_sm")
|
| 19 |
+
nlp = spacy.load("en_core_web_sm")
|
| 20 |
|
| 21 |
# Load the pre-trained models for image captioning and summarization
|
| 22 |
model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-85k-09"
|
|
|
|
| 42 |
synonyms.add(lemma.name())
|
| 43 |
return synonyms
|
| 44 |
|
| 45 |
+
def preprocess_query(query):
|
| 46 |
+
doc = nlp(query)
|
| 47 |
+
tokens = set()
|
| 48 |
+
for token in doc:
|
| 49 |
+
tokens.add(token.text)
|
| 50 |
+
tokens.add(token.lemma_)
|
| 51 |
+
tokens.update(get_synonyms(token.text))
|
| 52 |
+
return tokens
|
| 53 |
+
|
| 54 |
def search_captions(query, captions):
|
| 55 |
+
query_tokens = preprocess_query(query)
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
results = []
|
| 58 |
for path, caption in captions.items():
|
| 59 |
+
caption_tokens = preprocess_query(caption)
|
| 60 |
+
if query_tokens & caption_tokens:
|
| 61 |
results.append((path, caption))
|
| 62 |
|
| 63 |
return results
|