rbbist commited on
Commit
f2fcce2
Β·
verified Β·
1 Parent(s): 5f5c046

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +280 -0
app.py ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from semantic_search import CVSemanticSearch
4
+ import logging
5
+
6
+ # Set up logging
7
+ logging.basicConfig(level=logging.INFO)
8
+ logger = logging.getLogger(__name__)
9
+
10
+ # Initialize the semantic search system
11
+ cv_search = CVSemanticSearch()
12
+
13
+ def upload_cvs(files):
14
+ """
15
+ Handle CV uploads from Gradio
16
+
17
+ Args:
18
+ files: List of uploaded files from Gradio
19
+
20
+ Returns:
21
+ Status message
22
+ """
23
+ if not files:
24
+ return "No files uploaded."
25
+
26
+ successful = 0
27
+ total = len(files)
28
+
29
+ for file in files:
30
+ try:
31
+ # Read file content
32
+ with open(file.name, 'rb') as f:
33
+ file_content = f.read()
34
+
35
+ # Get filename from path
36
+ filename = os.path.basename(file.name)
37
+
38
+ # Add to database
39
+ if cv_search.add_cv_to_database(file_content, filename):
40
+ successful += 1
41
+ logger.info(f"Successfully uploaded: {filename}")
42
+ else:
43
+ logger.error(f"Failed to upload: {filename}")
44
+
45
+ except Exception as e:
46
+ logger.error(f"Error processing file {file.name}: {str(e)}")
47
+
48
+ db_info = cv_search.get_database_info()
49
+
50
+ return f"""
51
+ Upload Complete!
52
+ βœ… Successfully processed: {successful}/{total} files
53
+ πŸ“Š Database now contains: {db_info['unique_cvs']} CVs ({db_info['total_chunks']} chunks)
54
+
55
+ CVs in database: {', '.join(db_info['cv_filenames'])}
56
+ """
57
+
58
+ def search_matching_cvs(job_description, num_results):
59
+ """
60
+ Search for CVs matching the job description
61
+
62
+ Args:
63
+ job_description: Job description text
64
+ num_results: Number of results to return
65
+
66
+ Returns:
67
+ Formatted search results
68
+ """
69
+ if not job_description.strip():
70
+ return "Please enter a job description."
71
+
72
+ # Get database info
73
+ db_info = cv_search.get_database_info()
74
+
75
+ if db_info['unique_cvs'] == 0:
76
+ return "No CVs in database. Please upload some CV PDFs first."
77
+
78
+ # Perform search
79
+ results = cv_search.search_cvs(job_description, top_k=num_results)
80
+
81
+ if not results:
82
+ return "No matching CVs found."
83
+
84
+ # Format results
85
+ output = f"🎯 **Top {len(results)} Matching CVs:**\n\n"
86
+
87
+ for i, cv in enumerate(results, 1):
88
+ similarity_percentage = cv['weighted_score'] * 100
89
+
90
+ output += f"""
91
+ **{i}. {cv['filename']}**
92
+ - **Match Score**: {similarity_percentage:.1f}%
93
+ - **Max Similarity**: {cv['max_similarity']*100:.1f}%
94
+ - **Avg Similarity**: {cv['avg_similarity']*100:.1f}%
95
+ - **Chunks Analyzed**: {cv['chunk_count']}
96
+ - **Best Match Preview**: {cv['best_match_text']}
97
+
98
+ ---
99
+ """
100
+
101
+ return output
102
+
103
+ def get_database_status():
104
+ """
105
+ Get current database status
106
+
107
+ Returns:
108
+ Database information as formatted string
109
+ """
110
+ db_info = cv_search.get_database_info()
111
+
112
+ if db_info['unique_cvs'] == 0:
113
+ return "πŸ“ Database is empty. Upload some CV PDFs to get started!"
114
+
115
+ return f"""
116
+ πŸ“Š **Database Status:**
117
+ - **Total CVs**: {db_info['unique_cvs']}
118
+ - **Total Chunks**: {db_info['total_chunks']}
119
+ - **CVs in Database**: {', '.join(db_info['cv_filenames'])}
120
+ """
121
+
122
+ def clear_database():
123
+ """
124
+ Clear the entire database
125
+
126
+ Returns:
127
+ Status message
128
+ """
129
+ if cv_search.clear_database():
130
+ return "πŸ—‘οΈ Database cleared successfully!"
131
+ else:
132
+ return "❌ Error clearing database."
133
+
134
+ # Create Gradio interface
135
+ def create_interface():
136
+ """Create and return the Gradio interface"""
137
+
138
+ with gr.Blocks(
139
+ title="CV Semantic Search",
140
+ theme=gr.themes.Soft(),
141
+ css="""
142
+ .container { max-width: 1200px; margin: auto; }
143
+ .upload-section { background: #f8f9fa; padding: 20px; border-radius: 10px; margin: 10px 0; }
144
+ .search-section { background: #e8f5e8; padding: 20px; border-radius: 10px; margin: 10px 0; }
145
+ .status-section { background: #fff3cd; padding: 15px; border-radius: 8px; margin: 10px 0; }
146
+ """
147
+ ) as demo:
148
+
149
+ gr.Markdown("""
150
+ # πŸ” CV Semantic Search System
151
+
152
+ Upload CV PDFs and search for the best matches based on job descriptions using AI-powered semantic search.
153
+ """)
154
+
155
+ with gr.Row():
156
+ with gr.Column(scale=1):
157
+ # Upload Section
158
+ with gr.Group():
159
+ gr.Markdown("## πŸ“ Upload CVs")
160
+
161
+ cv_files = gr.File(
162
+ label="Upload CV PDFs",
163
+ file_count="multiple",
164
+ file_types=[".pdf"],
165
+ elem_classes=["upload-section"]
166
+ )
167
+
168
+ upload_btn = gr.Button(
169
+ "Upload CVs to Database",
170
+ variant="primary",
171
+ size="lg"
172
+ )
173
+
174
+ upload_output = gr.Markdown(
175
+ "Upload CVs to build your searchable database.",
176
+ elem_classes=["status-section"]
177
+ )
178
+
179
+ with gr.Column(scale=1):
180
+ # Search Section
181
+ with gr.Group():
182
+ gr.Markdown("## 🎯 Search CVs")
183
+
184
+ job_description = gr.Textbox(
185
+ label="Job Description",
186
+ placeholder="Enter the job description here...\n\nExample: Looking for a senior software engineer with 5+ years experience in Python, React, and cloud technologies. Strong background in microservices and API development required.",
187
+ lines=6,
188
+ elem_classes=["search-section"]
189
+ )
190
+
191
+ num_results = gr.Slider(
192
+ label="Number of Results",
193
+ minimum=1,
194
+ maximum=10,
195
+ value=5,
196
+ step=1
197
+ )
198
+
199
+ search_btn = gr.Button(
200
+ "Search Matching CVs",
201
+ variant="secondary",
202
+ size="lg"
203
+ )
204
+
205
+ # Search Results
206
+ with gr.Row():
207
+ search_output = gr.Markdown(
208
+ "Enter a job description and click search to find matching CVs.",
209
+ elem_classes=["search-section"]
210
+ )
211
+
212
+ # Database Management
213
+ with gr.Row():
214
+ with gr.Column(scale=2):
215
+ status_output = gr.Markdown(
216
+ get_database_status(),
217
+ elem_classes=["status-section"]
218
+ )
219
+
220
+ with gr.Column(scale=1):
221
+ with gr.Group():
222
+ refresh_btn = gr.Button("πŸ”„ Refresh Status", size="sm")
223
+ clear_btn = gr.Button("πŸ—‘οΈ Clear Database", size="sm", variant="stop")
224
+
225
+ # Event handlers
226
+ upload_btn.click(
227
+ fn=upload_cvs,
228
+ inputs=[cv_files],
229
+ outputs=[upload_output]
230
+ ).then(
231
+ fn=get_database_status,
232
+ outputs=[status_output]
233
+ )
234
+
235
+ search_btn.click(
236
+ fn=search_matching_cvs,
237
+ inputs=[job_description, num_results],
238
+ outputs=[search_output]
239
+ )
240
+
241
+ refresh_btn.click(
242
+ fn=get_database_status,
243
+ outputs=[status_output]
244
+ )
245
+
246
+ clear_btn.click(
247
+ fn=clear_database,
248
+ outputs=[status_output]
249
+ ).then(
250
+ fn=get_database_status,
251
+ outputs=[status_output]
252
+ )
253
+
254
+ # Example usage
255
+ gr.Markdown("""
256
+ ## πŸ“ How to Use:
257
+
258
+ 1. **Upload CVs**: Use the file upload component to add multiple PDF CVs to the database
259
+ 2. **Enter Job Description**: Paste or type the job requirements you want to match
260
+ 3. **Search**: Click search to find the top matching CVs based on semantic similarity
261
+ 4. **Review Results**: See ranked CVs with similarity scores and preview text
262
+
263
+ ### πŸ’‘ Tips for Better Results:
264
+ - Include specific skills, technologies, and requirements in your job description
265
+ - The more detailed your job description, the better the matching accuracy
266
+ - The system analyzes semantic meaning, not just keyword matching
267
+ - Upload multiple CVs for better comparison and ranking
268
+ """)
269
+
270
+ return demo
271
+
272
+ # Main function to run the app
273
+ if __name__ == "__main__":
274
+ demo = create_interface()
275
+ demo.launch(
276
+ share=True, # Enable sharing for Hugging Face Spaces
277
+ server_name="0.0.0.0", # Enable access from outside container
278
+ server_port=7860, # Standard port for Hugging Face Spaces
279
+ show_error=True
280
+ )