Spaces:
Runtime error
Runtime error
| # app.py | |
| import gradio as gr | |
| from sentence_transformers import SentenceTransformer | |
| import numpy as np | |
| import os | |
| from pathlib import Path | |
| import tempfile | |
| # Initialize the embedding model | |
| model = SentenceTransformer('all-MiniLM-L6-v2') | |
| # In-memory storage for documents and embeddings | |
| documents = [] | |
| embeddings = [] | |
| file_names = [] | |
| def process_file(file): | |
| """Process uploaded file and store its embedding""" | |
| if file is None: | |
| return "β No file uploaded" | |
| try: | |
| # Read file content | |
| with open(file.name, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| # Generate embedding | |
| embedding = model.encode(content) | |
| # Store document | |
| documents.append(content) | |
| embeddings.append(embedding) | |
| file_names.append(os.path.basename(file.name)) | |
| return f"β Successfully processed: {os.path.basename(file.name)}\nTotal documents: {len(documents)}" | |
| except Exception as e: | |
| return f"β Error processing file: {str(e)}" | |
| def semantic_search(query, top_k=3): | |
| """Perform semantic search across uploaded documents""" | |
| if not query: | |
| return "β οΈ Please enter a search query" | |
| if not documents: | |
| return "β οΈ No documents uploaded yet. Please upload some files first." | |
| try: | |
| # Generate query embedding | |
| query_embedding = model.encode(query) | |
| # Calculate cosine similarities | |
| similarities = [] | |
| for i, doc_embedding in enumerate(embeddings): | |
| similarity = np.dot(query_embedding, doc_embedding) / ( | |
| np.linalg.norm(query_embedding) * np.linalg.norm(doc_embedding) | |
| ) | |
| similarities.append((similarity, i)) | |
| # Sort by similarity (descending) | |
| similarities.sort(reverse=True) | |
| # Build results | |
| results = [] | |
| for score, idx in similarities[:top_k]: | |
| doc_content = documents[idx][:500] # Show first 500 chars | |
| results.append(f"**File:** {file_names[idx]}\n" | |
| f"**Similarity Score:** {score:.3f}\n" | |
| f"**Content Preview:**\n{doc_content}...\n") | |
| return "\n---\n".join(results) | |
| except Exception as e: | |
| return f"β Search error: {str(e)}" | |
| def clear_documents(): | |
| """Clear all uploaded documents""" | |
| documents.clear() | |
| embeddings.clear() | |
| file_names.clear() | |
| return "ποΈ All documents cleared" | |
| # Create the Gradio interface | |
| with gr.Blocks(title="AI Semantic File Search", theme=gr.themes.Soft()) as app: | |
| gr.Markdown("# π AI Semantic File Search") | |
| gr.Markdown("Upload documents and search through them using AI-powered semantic search!") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Upload Documents") | |
| file_input = gr.File(label="Upload Text File", file_types=[".txt", ".md", ".py", ".json"]) | |
| process_btn = gr.Button("π€ Process File", variant="primary") | |
| status_output = gr.Textbox(label="Status", interactive=False, lines=2) | |
| clear_btn = gr.Button("ποΈ Clear All", variant="secondary") | |
| with gr.Column(scale=2): | |
| gr.Markdown("### Search Documents") | |
| query_input = gr.Textbox( | |
| label="Search Query", | |
| placeholder="Enter your search query...", | |
| lines=2 | |
| ) | |
| top_k_slider = gr.Slider(1, 10, value=3, step=1, label="Number of Results") | |
| search_btn = gr.Button("π Search", variant="primary") | |
| results_output = gr.Markdown(label="Search Results") | |
| # Event handlers | |
| process_btn.click( | |
| fn=process_file, | |
| inputs=[file_input], | |
| outputs=[status_output] | |
| ) | |
| search_btn.click( | |
| fn=semantic_search, | |
| inputs=[query_input, top_k_slider], | |
| outputs=[results_output] | |
| ) | |
| clear_btn.click( | |
| fn=clear_documents, | |
| outputs=[status_output] | |
| ) | |
| if __name__ == "__main__": | |
| app.launch() |