# app.py import gradio as gr from sentence_transformers import SentenceTransformer import numpy as np import os from pathlib import Path import tempfile # Initialize the embedding model model = SentenceTransformer('all-MiniLM-L6-v2') # In-memory storage for documents and embeddings documents = [] embeddings = [] file_names = [] def process_file(file): """Process uploaded file and store its embedding""" if file is None: return "❌ No file uploaded" try: # Read file content with open(file.name, 'r', encoding='utf-8') as f: content = f.read() # Generate embedding embedding = model.encode(content) # Store document documents.append(content) embeddings.append(embedding) file_names.append(os.path.basename(file.name)) return f"✅ Successfully processed: {os.path.basename(file.name)}\nTotal documents: {len(documents)}" except Exception as e: return f"❌ Error processing file: {str(e)}" def semantic_search(query, top_k=3): """Perform semantic search across uploaded documents""" if not query: return "⚠️ Please enter a search query" if not documents: return "⚠️ No documents uploaded yet. Please upload some files first." try: # Generate query embedding query_embedding = model.encode(query) # Calculate cosine similarities similarities = [] for i, doc_embedding in enumerate(embeddings): similarity = np.dot(query_embedding, doc_embedding) / ( np.linalg.norm(query_embedding) * np.linalg.norm(doc_embedding) ) similarities.append((similarity, i)) # Sort by similarity (descending) similarities.sort(reverse=True) # Build results results = [] for score, idx in similarities[:top_k]: doc_content = documents[idx][:500] # Show first 500 chars results.append(f"**File:** {file_names[idx]}\n" f"**Similarity Score:** {score:.3f}\n" f"**Content Preview:**\n{doc_content}...\n") return "\n---\n".join(results) except Exception as e: return f"❌ Search error: {str(e)}" def clear_documents(): """Clear all uploaded documents""" documents.clear() embeddings.clear() file_names.clear() return "🗑️ All documents cleared" # Create the Gradio interface with gr.Blocks(title="AI Semantic File Search", theme=gr.themes.Soft()) as app: gr.Markdown("# 🔍 AI Semantic File Search") gr.Markdown("Upload documents and search through them using AI-powered semantic search!") with gr.Row(): with gr.Column(scale=1): gr.Markdown("### Upload Documents") file_input = gr.File(label="Upload Text File", file_types=[".txt", ".md", ".py", ".json"]) process_btn = gr.Button("📤 Process File", variant="primary") status_output = gr.Textbox(label="Status", interactive=False, lines=2) clear_btn = gr.Button("🗑️ Clear All", variant="secondary") with gr.Column(scale=2): gr.Markdown("### Search Documents") query_input = gr.Textbox( label="Search Query", placeholder="Enter your search query...", lines=2 ) top_k_slider = gr.Slider(1, 10, value=3, step=1, label="Number of Results") search_btn = gr.Button("🔎 Search", variant="primary") results_output = gr.Markdown(label="Search Results") # Event handlers process_btn.click( fn=process_file, inputs=[file_input], outputs=[status_output] ) search_btn.click( fn=semantic_search, inputs=[query_input, top_k_slider], outputs=[results_output] ) clear_btn.click( fn=clear_documents, outputs=[status_output] ) if __name__ == "__main__": app.launch()