Semantic-Search / app.py
JackSparrow89's picture
Update app.py
87b8b15 verified
# app.py
import gradio as gr
from sentence_transformers import SentenceTransformer
import numpy as np
import os
from pathlib import Path
import tempfile
# Initialize the embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')
# In-memory storage for documents and embeddings
documents = []
embeddings = []
file_names = []
def process_file(file):
"""Process uploaded file and store its embedding"""
if file is None:
return "❌ No file uploaded"
try:
# Read file content
with open(file.name, 'r', encoding='utf-8') as f:
content = f.read()
# Generate embedding
embedding = model.encode(content)
# Store document
documents.append(content)
embeddings.append(embedding)
file_names.append(os.path.basename(file.name))
return f"βœ… Successfully processed: {os.path.basename(file.name)}\nTotal documents: {len(documents)}"
except Exception as e:
return f"❌ Error processing file: {str(e)}"
def semantic_search(query, top_k=3):
"""Perform semantic search across uploaded documents"""
if not query:
return "⚠️ Please enter a search query"
if not documents:
return "⚠️ No documents uploaded yet. Please upload some files first."
try:
# Generate query embedding
query_embedding = model.encode(query)
# Calculate cosine similarities
similarities = []
for i, doc_embedding in enumerate(embeddings):
similarity = np.dot(query_embedding, doc_embedding) / (
np.linalg.norm(query_embedding) * np.linalg.norm(doc_embedding)
)
similarities.append((similarity, i))
# Sort by similarity (descending)
similarities.sort(reverse=True)
# Build results
results = []
for score, idx in similarities[:top_k]:
doc_content = documents[idx][:500] # Show first 500 chars
results.append(f"**File:** {file_names[idx]}\n"
f"**Similarity Score:** {score:.3f}\n"
f"**Content Preview:**\n{doc_content}...\n")
return "\n---\n".join(results)
except Exception as e:
return f"❌ Search error: {str(e)}"
def clear_documents():
"""Clear all uploaded documents"""
documents.clear()
embeddings.clear()
file_names.clear()
return "πŸ—‘οΈ All documents cleared"
# Create the Gradio interface
with gr.Blocks(title="AI Semantic File Search", theme=gr.themes.Soft()) as app:
gr.Markdown("# πŸ” AI Semantic File Search")
gr.Markdown("Upload documents and search through them using AI-powered semantic search!")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Upload Documents")
file_input = gr.File(label="Upload Text File", file_types=[".txt", ".md", ".py", ".json"])
process_btn = gr.Button("πŸ“€ Process File", variant="primary")
status_output = gr.Textbox(label="Status", interactive=False, lines=2)
clear_btn = gr.Button("πŸ—‘οΈ Clear All", variant="secondary")
with gr.Column(scale=2):
gr.Markdown("### Search Documents")
query_input = gr.Textbox(
label="Search Query",
placeholder="Enter your search query...",
lines=2
)
top_k_slider = gr.Slider(1, 10, value=3, step=1, label="Number of Results")
search_btn = gr.Button("πŸ”Ž Search", variant="primary")
results_output = gr.Markdown(label="Search Results")
# Event handlers
process_btn.click(
fn=process_file,
inputs=[file_input],
outputs=[status_output]
)
search_btn.click(
fn=semantic_search,
inputs=[query_input, top_k_slider],
outputs=[results_output]
)
clear_btn.click(
fn=clear_documents,
outputs=[status_output]
)
if __name__ == "__main__":
app.launch()