Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,12 +2,19 @@ import gradio as gr
|
|
| 2 |
import pickle
|
| 3 |
import os
|
| 4 |
from datasets import load_dataset
|
| 5 |
-
|
| 6 |
-
dataset = load_dataset("SandipPalit/Movie_Dataset")
|
| 7 |
from InstructorEmbedding import INSTRUCTOR
|
| 8 |
-
model = INSTRUCTOR('hkunlp/instructor-xl')
|
| 9 |
import heapq
|
| 10 |
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
def getSimilarity(sentences_a,sentences_b):
|
| 13 |
embeddings_a = pickle.load(open(os.getcwd()+"/temp.pkl",'rb'))
|
|
@@ -15,10 +22,7 @@ def getSimilarity(sentences_a,sentences_b):
|
|
| 15 |
similarities = cosine_similarity(embeddings_a,embeddings_b)
|
| 16 |
return similarities
|
| 17 |
|
| 18 |
-
|
| 19 |
-
from nltk.corpus import stopwords
|
| 20 |
-
from nltk.tokenize import word_tokenize, sent_tokenize
|
| 21 |
-
from nltk.stem import WordNetLemmatizer
|
| 22 |
|
| 23 |
nltk.download('punkt')
|
| 24 |
nltk.download('stopwords')
|
|
@@ -72,7 +76,7 @@ def get_top_k_matches(np_array,k,sentences):
|
|
| 72 |
return indices
|
| 73 |
|
| 74 |
|
| 75 |
-
|
| 76 |
df=pd.DataFrame({"Title":dataset['train']['Title'],"Plot":dataset['train']['Overview']})
|
| 77 |
|
| 78 |
def getOutput(text, size=1000):
|
|
|
|
| 2 |
import pickle
|
| 3 |
import os
|
| 4 |
from datasets import load_dataset
|
| 5 |
+
from gradio.components import Label
|
|
|
|
| 6 |
from InstructorEmbedding import INSTRUCTOR
|
|
|
|
| 7 |
import heapq
|
| 8 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 9 |
+
import nltk
|
| 10 |
+
from nltk.corpus import stopwords
|
| 11 |
+
from nltk.tokenize import word_tokenize, sent_tokenize
|
| 12 |
+
from nltk.stem import WordNetLemmatizer
|
| 13 |
+
import pandas as pd
|
| 14 |
+
|
| 15 |
+
dataset = load_dataset("SandipPalit/Movie_Dataset")
|
| 16 |
+
|
| 17 |
+
model = INSTRUCTOR('hkunlp/instructor-xl')
|
| 18 |
|
| 19 |
def getSimilarity(sentences_a,sentences_b):
|
| 20 |
embeddings_a = pickle.load(open(os.getcwd()+"/temp.pkl",'rb'))
|
|
|
|
| 22 |
similarities = cosine_similarity(embeddings_a,embeddings_b)
|
| 23 |
return similarities
|
| 24 |
|
| 25 |
+
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
nltk.download('punkt')
|
| 28 |
nltk.download('stopwords')
|
|
|
|
| 76 |
return indices
|
| 77 |
|
| 78 |
|
| 79 |
+
|
| 80 |
df=pd.DataFrame({"Title":dataset['train']['Title'],"Plot":dataset['train']['Overview']})
|
| 81 |
|
| 82 |
def getOutput(text, size=1000):
|