# imports import gradio as gr import pandas as pd from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity # Load the Sentence Transformer model once and store it in a variable model = SentenceTransformer('all-MiniLM-L6-v2') # You can choose a different model if preferred # Function to process the uploaded file and find top 5 matching notes def find_matching_notes(uploaded_file, user_input): # Read the uploaded CSV file if uploaded_file is not None: df = pd.read_csv(uploaded_file.name) # Ensure the necessary columns are present if not {'Source', 'Section', 'Notes'}.issubset(df.columns): return "The uploaded file must contain 'Source', 'Section', and 'Notes' columns." # Check for NaN values in 'Notes' and 'Section' if df[['Notes', 'Section']].isnull().any().any(): # Fill NaN values with empty strings df['Notes'].fillna('', inplace=True) df['Section'].fillna('', inplace=True) # Combine 'Notes' and 'Section' for processing df['Combined'] = df['Notes'] + ' ' + df['Section'] # Encode the combined text using the Sentence Transformer all_texts = df['Combined'].tolist() + [user_input] embeddings = model.encode(all_texts, convert_to_tensor=True) # Compute cosine similarity cosine_similarities = cosine_similarity(embeddings[-1].unsqueeze(0), embeddings[:-1]) # Get the top 5 indices of the most similar entries top_indices = cosine_similarities[0].argsort()[-5:][::-1] # Prepare the results in a readable format results = df.iloc[top_indices][['Notes', 'Source', 'Section']] formatted_results = [] for index, row in results.iterrows(): formatted_results.append( f"**Notes:** {row['Notes']}\n" f"**Source:** {row['Source']}\n" f"**Section:** {row['Section']}\n" "-------------------------------------\n" ) return "\n".join(formatted_results) return "Please upload a valid CSV file." # Create Gradio interface iface = gr.Interface( fn=find_matching_notes, inputs=[ gr.File(label="Upload Research Notes (CSV)"), gr.Textbox(label="Enter your text here", placeholder="Type your content...") ], outputs=gr.Textbox(label="Top 5 Matching Entries", lines=15, placeholder="Results will be displayed here..."), title="Research Notes Matcher", description="Upload a CSV file with 'Source', 'Section', and 'Notes'. Enter your text to find the top 5 matching notes. For documentation check the Readme file in the files section" ) # Launch the app iface.launch()