JackSparrow89 commited on
Commit
87b8b15
Β·
verified Β·
1 Parent(s): 1e54038

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -0
app.py CHANGED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ from sentence_transformers import SentenceTransformer
4
+ import numpy as np
5
+ import os
6
+ from pathlib import Path
7
+ import tempfile
8
+
9
+ # Initialize the embedding model
10
+ model = SentenceTransformer('all-MiniLM-L6-v2')
11
+
12
+ # In-memory storage for documents and embeddings
13
+ documents = []
14
+ embeddings = []
15
+ file_names = []
16
+
17
+ def process_file(file):
18
+ """Process uploaded file and store its embedding"""
19
+ if file is None:
20
+ return "❌ No file uploaded"
21
+
22
+ try:
23
+ # Read file content
24
+ with open(file.name, 'r', encoding='utf-8') as f:
25
+ content = f.read()
26
+
27
+ # Generate embedding
28
+ embedding = model.encode(content)
29
+
30
+ # Store document
31
+ documents.append(content)
32
+ embeddings.append(embedding)
33
+ file_names.append(os.path.basename(file.name))
34
+
35
+ return f"βœ… Successfully processed: {os.path.basename(file.name)}\nTotal documents: {len(documents)}"
36
+
37
+ except Exception as e:
38
+ return f"❌ Error processing file: {str(e)}"
39
+
40
+ def semantic_search(query, top_k=3):
41
+ """Perform semantic search across uploaded documents"""
42
+ if not query:
43
+ return "⚠️ Please enter a search query"
44
+
45
+ if not documents:
46
+ return "⚠️ No documents uploaded yet. Please upload some files first."
47
+
48
+ try:
49
+ # Generate query embedding
50
+ query_embedding = model.encode(query)
51
+
52
+ # Calculate cosine similarities
53
+ similarities = []
54
+ for i, doc_embedding in enumerate(embeddings):
55
+ similarity = np.dot(query_embedding, doc_embedding) / (
56
+ np.linalg.norm(query_embedding) * np.linalg.norm(doc_embedding)
57
+ )
58
+ similarities.append((similarity, i))
59
+
60
+ # Sort by similarity (descending)
61
+ similarities.sort(reverse=True)
62
+
63
+ # Build results
64
+ results = []
65
+ for score, idx in similarities[:top_k]:
66
+ doc_content = documents[idx][:500] # Show first 500 chars
67
+ results.append(f"**File:** {file_names[idx]}\n"
68
+ f"**Similarity Score:** {score:.3f}\n"
69
+ f"**Content Preview:**\n{doc_content}...\n")
70
+
71
+ return "\n---\n".join(results)
72
+
73
+ except Exception as e:
74
+ return f"❌ Search error: {str(e)}"
75
+
76
+ def clear_documents():
77
+ """Clear all uploaded documents"""
78
+ documents.clear()
79
+ embeddings.clear()
80
+ file_names.clear()
81
+ return "πŸ—‘οΈ All documents cleared"
82
+
83
+ # Create the Gradio interface
84
+ with gr.Blocks(title="AI Semantic File Search", theme=gr.themes.Soft()) as app:
85
+ gr.Markdown("# πŸ” AI Semantic File Search")
86
+ gr.Markdown("Upload documents and search through them using AI-powered semantic search!")
87
+
88
+ with gr.Row():
89
+ with gr.Column(scale=1):
90
+ gr.Markdown("### Upload Documents")
91
+ file_input = gr.File(label="Upload Text File", file_types=[".txt", ".md", ".py", ".json"])
92
+ process_btn = gr.Button("πŸ“€ Process File", variant="primary")
93
+ status_output = gr.Textbox(label="Status", interactive=False, lines=2)
94
+ clear_btn = gr.Button("πŸ—‘οΈ Clear All", variant="secondary")
95
+
96
+ with gr.Column(scale=2):
97
+ gr.Markdown("### Search Documents")
98
+ query_input = gr.Textbox(
99
+ label="Search Query",
100
+ placeholder="Enter your search query...",
101
+ lines=2
102
+ )
103
+ top_k_slider = gr.Slider(1, 10, value=3, step=1, label="Number of Results")
104
+ search_btn = gr.Button("πŸ”Ž Search", variant="primary")
105
+ results_output = gr.Markdown(label="Search Results")
106
+
107
+ # Event handlers
108
+ process_btn.click(
109
+ fn=process_file,
110
+ inputs=[file_input],
111
+ outputs=[status_output]
112
+ )
113
+
114
+ search_btn.click(
115
+ fn=semantic_search,
116
+ inputs=[query_input, top_k_slider],
117
+ outputs=[results_output]
118
+ )
119
+
120
+ clear_btn.click(
121
+ fn=clear_documents,
122
+ outputs=[status_output]
123
+ )
124
+
125
+ if __name__ == "__main__":
126
+ app.launch()