Rakesh30 commited on
Commit
23228a6
·
1 Parent(s): 4b7d789

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -2,12 +2,19 @@ import gradio as gr
2
  import pickle
3
  import os
4
  from datasets import load_dataset
5
- embeddings_a = pickle.load(open(os.getcwd()+"/temp.pkl",'rb'))
6
- dataset = load_dataset("SandipPalit/Movie_Dataset")
7
  from InstructorEmbedding import INSTRUCTOR
8
- model = INSTRUCTOR('hkunlp/instructor-xl')
9
  import heapq
10
  from sklearn.metrics.pairwise import cosine_similarity
 
 
 
 
 
 
 
 
 
11
 
12
  def getSimilarity(sentences_a,sentences_b):
13
  embeddings_a = pickle.load(open(os.getcwd()+"/temp.pkl",'rb'))
@@ -15,10 +22,7 @@ def getSimilarity(sentences_a,sentences_b):
15
  similarities = cosine_similarity(embeddings_a,embeddings_b)
16
  return similarities
17
 
18
- import nltk
19
- from nltk.corpus import stopwords
20
- from nltk.tokenize import word_tokenize, sent_tokenize
21
- from nltk.stem import WordNetLemmatizer
22
 
23
  nltk.download('punkt')
24
  nltk.download('stopwords')
@@ -72,7 +76,7 @@ def get_top_k_matches(np_array,k,sentences):
72
  return indices
73
 
74
 
75
- import pandas as pd
76
  df=pd.DataFrame({"Title":dataset['train']['Title'],"Plot":dataset['train']['Overview']})
77
 
78
  def getOutput(text, size=1000):
 
2
  import pickle
3
  import os
4
  from datasets import load_dataset
5
+ from gradio.components import Label
 
6
  from InstructorEmbedding import INSTRUCTOR
 
7
  import heapq
8
  from sklearn.metrics.pairwise import cosine_similarity
9
+ import nltk
10
+ from nltk.corpus import stopwords
11
+ from nltk.tokenize import word_tokenize, sent_tokenize
12
+ from nltk.stem import WordNetLemmatizer
13
+ import pandas as pd
14
+
15
+ dataset = load_dataset("SandipPalit/Movie_Dataset")
16
+
17
+ model = INSTRUCTOR('hkunlp/instructor-xl')
18
 
19
  def getSimilarity(sentences_a,sentences_b):
20
  embeddings_a = pickle.load(open(os.getcwd()+"/temp.pkl",'rb'))
 
22
  similarities = cosine_similarity(embeddings_a,embeddings_b)
23
  return similarities
24
 
25
+
 
 
 
26
 
27
  nltk.download('punkt')
28
  nltk.download('stopwords')
 
76
  return indices
77
 
78
 
79
+
80
  df=pd.DataFrame({"Title":dataset['train']['Title'],"Plot":dataset['train']['Overview']})
81
 
82
  def getOutput(text, size=1000):