Update app.py
Browse files
app.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
-
from
|
| 4 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 5 |
|
|
|
|
|
|
|
|
|
|
| 6 |
# Function to process the uploaded file and find top 5 matching notes
|
| 7 |
def find_matching_notes(uploaded_file, user_input):
|
| 8 |
# Read the uploaded CSV file
|
|
@@ -22,13 +25,12 @@ def find_matching_notes(uploaded_file, user_input):
|
|
| 22 |
# Combine 'Notes' and 'Section' for processing
|
| 23 |
df['Combined'] = df['Notes'] + ' ' + df['Section']
|
| 24 |
|
| 25 |
-
#
|
| 26 |
-
vectorizer = TfidfVectorizer()
|
| 27 |
all_texts = df['Combined'].tolist() + [user_input]
|
| 28 |
-
|
| 29 |
|
| 30 |
# Compute cosine similarity
|
| 31 |
-
cosine_similarities = cosine_similarity(
|
| 32 |
|
| 33 |
# Get the top 5 indices of the most similar entries
|
| 34 |
top_indices = cosine_similarities[0].argsort()[-5:][::-1]
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
+
from sentence_transformers import SentenceTransformer
|
| 4 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 5 |
|
| 6 |
+
# Load the Sentence Transformer model
|
| 7 |
+
model = SentenceTransformer('all-MiniLM-L6-v2') # You can choose a different model if preferred
|
| 8 |
+
|
| 9 |
# Function to process the uploaded file and find top 5 matching notes
|
| 10 |
def find_matching_notes(uploaded_file, user_input):
|
| 11 |
# Read the uploaded CSV file
|
|
|
|
| 25 |
# Combine 'Notes' and 'Section' for processing
|
| 26 |
df['Combined'] = df['Notes'] + ' ' + df['Section']
|
| 27 |
|
| 28 |
+
# Encode the combined text using the Sentence Transformer
|
|
|
|
| 29 |
all_texts = df['Combined'].tolist() + [user_input]
|
| 30 |
+
embeddings = model.encode(all_texts, convert_to_tensor=True)
|
| 31 |
|
| 32 |
# Compute cosine similarity
|
| 33 |
+
cosine_similarities = cosine_similarity(embeddings[-1].unsqueeze(0), embeddings[:-1])
|
| 34 |
|
| 35 |
# Get the top 5 indices of the most similar entries
|
| 36 |
top_indices = cosine_similarities[0].argsort()[-5:][::-1]
|