Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| from semantic_search import CVSemanticSearch | |
| import logging | |
| # Set up logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Google Drive Configuration - UPDATE THESE VALUES | |
| FOLDER_ID = "1j1faOlXxoYfPLdzDfGvDbtkENsRoDxXN" # Replace with your folder ID | |
| API_KEY = os.getenv("GOOGLE_DRIVE_API_KEY") # Replace with your API key | |
| # Global variables to store the search system and file mapping | |
| cv_search = None | |
| file_mapping = {} | |
| initialization_status = "Initializing..." | |
| def initialize_database(): | |
| """ | |
| Initialize the database by loading CVs from Google Drive folder | |
| This runs once when the space starts | |
| """ | |
| global cv_search, initialization_status, file_mapping | |
| try: | |
| logger.info("Initializing CV Semantic Search system...") | |
| cv_search = CVSemanticSearch() | |
| logger.info("Loading CVs from Google Drive folder...") | |
| successful, total, file_map = cv_search.load_cvs_from_google_drive(FOLDER_ID, API_KEY) | |
| file_mapping = file_map | |
| if successful > 0: | |
| initialization_status = f"β Successfully loaded {successful}/{total} CVs into database" | |
| logger.info(initialization_status) | |
| return True | |
| else: | |
| initialization_status = "β Failed to load any CVs from Google Drive. Check API key and folder ID." | |
| logger.error(initialization_status) | |
| return False | |
| except Exception as e: | |
| initialization_status = f"β Error during initialization: {str(e)}" | |
| logger.error(initialization_status) | |
| return False | |
| def process_job_description(jd_text, jd_file): | |
| """ | |
| Process job description from either text input or PDF file | |
| Args: | |
| jd_text: Job description as text | |
| jd_file: Job description as PDF file | |
| Returns: | |
| Processed job description text | |
| """ | |
| # Priority: PDF file over text input | |
| if jd_file is not None: | |
| try: | |
| with open(jd_file.name, 'rb') as f: | |
| pdf_content = f.read() | |
| extracted_text = cv_search.extract_text_from_pdf_bytes(pdf_content) | |
| if extracted_text.strip(): | |
| return extracted_text.strip() | |
| except Exception as e: | |
| logger.error(f"Error processing JD PDF: {str(e)}") | |
| # Fallback to text input | |
| if jd_text and jd_text.strip(): | |
| return jd_text.strip() | |
| return "" | |
| def search_matching_cvs(jd_text, jd_file, num_results): | |
| """ | |
| Search for CVs matching the job description | |
| Args: | |
| jd_text: Job description as text | |
| jd_file: Job description as PDF file | |
| num_results: Number of results to return | |
| Returns: | |
| Formatted search results | |
| """ | |
| global cv_search, file_mapping | |
| if cv_search is None: | |
| return f"β System not initialized properly.\n\n{initialization_status}\n\nPlease refresh the page or check the configuration." | |
| # Process job description | |
| job_description = process_job_description(jd_text, jd_file) | |
| if not job_description: | |
| return "β Please provide a job description either as text or upload a PDF file." | |
| # Get database info | |
| db_info = cv_search.get_database_info() | |
| if db_info['unique_cvs'] == 0: | |
| return f"β No CVs in database.\n\n{initialization_status}" | |
| # Perform search | |
| results = cv_search.search_cvs(job_description, top_k=num_results) | |
| if not results: | |
| return "β No matching CVs found. Try using different keywords or requirements in your job description." | |
| # Format results | |
| jd_preview = job_description[:150] + "..." if len(job_description) > 150 else job_description | |
| output = f"""# π― Top {len(results)} Matching CVs | |
| **Job Description**: {jd_preview} | |
| --- | |
| """ | |
| for i, cv in enumerate(results, 1): | |
| similarity_percentage = cv['weighted_score'] * 100 | |
| filename = cv['filename'] | |
| # Get Google Drive link | |
| drive_link = "Not available" | |
| if filename in file_mapping: | |
| drive_link = file_mapping[filename]['webViewLink'] | |
| # Determine match quality | |
| if similarity_percentage >= 80: | |
| match_emoji = "π’" | |
| match_text = "Excellent Match" | |
| elif similarity_percentage >= 65: | |
| match_emoji = "π‘" | |
| match_text = "Good Match" | |
| elif similarity_percentage >= 50: | |
| match_emoji = "π " | |
| match_text = "Fair Match" | |
| else: | |
| match_emoji = "π΄" | |
| match_text = "Weak Match" | |
| output += f"""## {i}. {filename} | |
| **{match_emoji} {match_text}** - **{similarity_percentage:.1f}% Overall Match** | |
| π **Detailed Scores:** | |
| - Best Section Match: {cv['max_similarity']*100:.1f}% | |
| - Average Match: {cv['avg_similarity']*100:.1f}% | |
| - CV Sections Analyzed: {cv['chunk_count']} | |
| π‘ **Why This CV Matches:** | |
| *"{cv['best_match_text']}"* | |
| π **[Open CV in Google Drive]({drive_link})** | |
| --- | |
| """ | |
| return output | |
| def get_system_status(): | |
| """ | |
| Get current system status | |
| Returns: | |
| System information as formatted string | |
| """ | |
| global cv_search, initialization_status | |
| if cv_search is None: | |
| return f""" | |
| ## β οΈ System Status: Not Ready | |
| {initialization_status} | |
| **Possible Issues:** | |
| - Invalid Google Drive API key | |
| - Incorrect folder ID | |
| - Folder is not public | |
| - No PDF files in the folder | |
| """ | |
| db_info = cv_search.get_database_info() | |
| if db_info['unique_cvs'] == 0: | |
| return f""" | |
| ## β οΈ System Status: No CVs Loaded | |
| {initialization_status} | |
| **Please Check:** | |
| - Google Drive folder contains PDF files | |
| - Folder is publicly accessible | |
| - API key has proper permissions | |
| """ | |
| return f""" | |
| ## β System Status: Ready for Search | |
| π **Database Statistics:** | |
| - **CVs Loaded**: {db_info['unique_cvs']} resumes | |
| - **Text Chunks**: {db_info['total_chunks']} searchable segments | |
| - **Avg Chunks per CV**: {db_info['total_chunks'] / db_info['unique_cvs']:.1f} | |
| π€ **AI Model**: Sentence Transformers (all-MiniLM-L6-v2) | |
| π **Sample CVs**: {', '.join(db_info['cv_filenames'][:3])}{'...' if len(db_info['cv_filenames']) > 3 else ''} | |
| """ | |
| # Create Gradio interface | |
| def create_interface(): | |
| """Create and return the Gradio interface""" | |
| with gr.Blocks( | |
| title="CV Semantic Search - Auto-loaded from Google Drive", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .main-container { | |
| max-width: 1200px; | |
| margin: auto; | |
| padding: 20px; | |
| } | |
| .search-container { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white !important; | |
| padding: 30px; | |
| border-radius: 20px; | |
| margin: 20px 0; | |
| box-shadow: 0 10px 30px rgba(0,0,0,0.2); | |
| } | |
| .search-container * { | |
| color: white !important; | |
| } | |
| .status-container { | |
| background: #f8f9fa !important; | |
| color: #333 !important; | |
| padding: 25px; | |
| border-radius: 15px; | |
| margin: 20px 0; | |
| border-left: 5px solid #007bff; | |
| box-shadow: 0 5px 15px rgba(0,0,0,0.1); | |
| } | |
| .status-container * { | |
| color: #333 !important; | |
| } | |
| .results-container { | |
| background: #ffffff !important; | |
| color: #333 !important; | |
| padding: 25px; | |
| border-radius: 15px; | |
| border: 1px solid #dee2e6; | |
| margin: 20px 0; | |
| box-shadow: 0 5px 15px rgba(0,0,0,0.1); | |
| } | |
| .results-container * { | |
| color: #333 !important; | |
| } | |
| .header { | |
| text-align: center; | |
| padding: 30px; | |
| background: linear-gradient(135deg, #74b9ff, #0984e3); | |
| color: white !important; | |
| margin: -20px -20px 20px -20px; | |
| border-radius: 15px 15px 0 0; | |
| } | |
| .header * { | |
| color: white !important; | |
| } | |
| .tab-content { | |
| padding: 15px; | |
| } | |
| .markdown-content { | |
| background: #fff !important; | |
| color: #333 !important; | |
| padding: 20px; | |
| border-radius: 10px; | |
| } | |
| .markdown-content * { | |
| color: #333 !important; | |
| } | |
| """ | |
| ) as demo: | |
| with gr.Column(elem_classes=["main-container"]): | |
| gr.Markdown(""" | |
| <div class="header"> | |
| # π CV Semantic Search System | |
| ## AI-Powered Resume Matching | |
| ### *Automatically synced with Google Drive* | |
| </div> | |
| """) | |
| # System Status Display | |
| with gr.Row(): | |
| status_display = gr.Markdown( | |
| get_system_status(), | |
| elem_classes=["status-container", "markdown-content"] | |
| ) | |
| # Main Search Interface | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Group(elem_classes=["search-container"]): | |
| gr.Markdown("## π Job Description Input") | |
| with gr.Tab("π Text Input") as text_tab: | |
| jd_text = gr.Textbox( | |
| label="Paste Job Description", | |
| placeholder="""Paste your job description here... | |
| Example: | |
| Senior Software Engineer Position | |
| Requirements: | |
| β’ 5+ years of experience in Python, JavaScript, and React | |
| β’ Strong background in machine learning and AI | |
| β’ Experience with cloud platforms (AWS, Azure, GCP) | |
| β’ Knowledge of microservices and API development | |
| β’ Bachelor's degree in Computer Science or related field | |
| β’ Excellent problem-solving and communication skills | |
| Responsibilities: | |
| β’ Design and develop scalable web applications | |
| β’ Lead technical projects and mentor junior developers | |
| β’ Collaborate with cross-functional teams | |
| β’ Implement best practices for code quality and testing""", | |
| lines=12, | |
| max_lines=20, | |
| elem_classes=["tab-content"] | |
| ) | |
| with gr.Tab("π PDF Upload") as pdf_tab: | |
| jd_file = gr.File( | |
| label="Upload Job Description PDF", | |
| file_types=[".pdf"], | |
| file_count="single", | |
| elem_classes=["tab-content"] | |
| ) | |
| with gr.Row(): | |
| num_results = gr.Slider( | |
| label="Number of Top CVs to Return", | |
| minimum=1, | |
| maximum=10, | |
| value=5, | |
| step=1 | |
| ) | |
| search_btn = gr.Button( | |
| "π Find Best Matching CVs", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| # Search Results | |
| with gr.Row(): | |
| search_output = gr.Markdown( | |
| """ | |
| # π How to Use This System: | |
| 1. **Enter Job Requirements**: Use the text box or upload a PDF with your job description | |
| 2. **Click Search**: The AI will analyze semantic meaning and find the best matches | |
| 3. **Review Results**: See ranked CVs with detailed similarity scores and explanations | |
| ## π― What Makes This Special: | |
| - **Semantic Understanding**: Finds relevant CVs even if they don't use exact keywords | |
| - **Automatic Sync**: CVs are always up-to-date from your Google Drive folder | |
| - **Smart Ranking**: Combines multiple similarity metrics for accurate results | |
| - **Detailed Analysis**: Shows why each CV matches your requirements | |
| *Enter a job description above to get started!* | |
| """, | |
| elem_classes=["results-container", "markdown-content"] | |
| ) | |
| # Refresh Status Button | |
| with gr.Row(): | |
| refresh_btn = gr.Button("π Refresh System Status", size="sm") | |
| # Event handlers | |
| search_btn.click( | |
| fn=search_matching_cvs, | |
| inputs=[jd_text, jd_file, num_results], | |
| outputs=[search_output] | |
| ) | |
| refresh_btn.click( | |
| fn=get_system_status, | |
| outputs=[status_display] | |
| ) | |
| # Clear text input when PDF is uploaded | |
| jd_file.change( | |
| fn=lambda: "", | |
| outputs=[jd_text] | |
| ) | |
| # Clear file input when text is entered | |
| jd_text.change( | |
| fn=lambda x: None if x.strip() else None, | |
| inputs=[jd_text], | |
| outputs=[jd_file] | |
| ) | |
| # Footer | |
| gr.Markdown(""" | |
| --- | |
| # π οΈ Technical Details | |
| - **Vector Database**: ChromaDB (rebuilt on each restart) | |
| - **Embedding Model**: SentenceTransformers all-MiniLM-L6-v2 | |
| - **Text Extraction**: pdfplumber + OCR fallback for scanned documents | |
| - **CV Source**: Google Drive folder (automatically synced) | |
| - **Search Algorithm**: Cosine similarity with chunk aggregation | |
| ## π Support | |
| If no results appear, check that: | |
| - Your Google Drive folder is public | |
| - The folder contains PDF files | |
| - Your API key is valid and has Drive API access | |
| """, elem_classes=["markdown-content"]) | |
| return demo | |
| def main(): | |
| """Main function to initialize and run the app""" | |
| logger.info("Starting CV Semantic Search application...") | |
| # Initialize database at startup | |
| if initialize_database(): | |
| logger.info("β Database initialization successful") | |
| else: | |
| logger.error("β Database initialization failed") | |
| # Create and launch interface | |
| demo = create_interface() | |
| demo.launch( | |
| share=True, # Enable sharing for Hugging Face Spaces | |
| server_name="0.0.0.0", # Enable access from outside container | |
| server_port=7860, # Standard port for Hugging Face Spaces | |
| show_error=True | |
| ) | |
| if __name__ == "__main__": | |
| main() |