CitingLLM / app.py
fortuala's picture
Update app.py
1c0b0c8 verified
# imports
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
# Load the Sentence Transformer model once and store it in a variable
model = SentenceTransformer('all-MiniLM-L6-v2') # You can choose a different model if preferred
# Function to process the uploaded file and find top 5 matching notes
def find_matching_notes(uploaded_file, user_input):
# Read the uploaded CSV file
if uploaded_file is not None:
df = pd.read_csv(uploaded_file.name)
# Ensure the necessary columns are present
if not {'Source', 'Section', 'Notes'}.issubset(df.columns):
return "The uploaded file must contain 'Source', 'Section', and 'Notes' columns."
# Check for NaN values in 'Notes' and 'Section'
if df[['Notes', 'Section']].isnull().any().any():
# Fill NaN values with empty strings
df['Notes'].fillna('', inplace=True)
df['Section'].fillna('', inplace=True)
# Combine 'Notes' and 'Section' for processing
df['Combined'] = df['Notes'] + ' ' + df['Section']
# Encode the combined text using the Sentence Transformer
all_texts = df['Combined'].tolist() + [user_input]
embeddings = model.encode(all_texts, convert_to_tensor=True)
# Compute cosine similarity
cosine_similarities = cosine_similarity(embeddings[-1].unsqueeze(0), embeddings[:-1])
# Get the top 5 indices of the most similar entries
top_indices = cosine_similarities[0].argsort()[-5:][::-1]
# Prepare the results in a readable format
results = df.iloc[top_indices][['Notes', 'Source', 'Section']]
formatted_results = []
for index, row in results.iterrows():
formatted_results.append(
f"**Notes:** {row['Notes']}\n"
f"**Source:** {row['Source']}\n"
f"**Section:** {row['Section']}\n"
"-------------------------------------\n"
)
return "\n".join(formatted_results)
return "Please upload a valid CSV file."
# Create Gradio interface
iface = gr.Interface(
fn=find_matching_notes,
inputs=[
gr.File(label="Upload Research Notes (CSV)"),
gr.Textbox(label="Enter your text here", placeholder="Type your content...")
],
outputs=gr.Textbox(label="Top 5 Matching Entries", lines=15, placeholder="Results will be displayed here..."),
title="Research Notes Matcher",
description="Upload a CSV file with 'Source', 'Section', and 'Notes'. Enter your text to find the top 5 matching notes. For documentation check the Readme file in the files section"
)
# Launch the app
iface.launch()