import gradio as gr import os from semantic_search import CVSemanticSearch import logging # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Google Drive Configuration - UPDATE THESE VALUES FOLDER_ID = "1j1faOlXxoYfPLdzDfGvDbtkENsRoDxXN" # Replace with your folder ID API_KEY = os.getenv("GOOGLE_DRIVE_API_KEY") # Replace with your API key # Global variables to store the search system and file mapping cv_search = None file_mapping = {} initialization_status = "Initializing..." def initialize_database(): """ Initialize the database by loading CVs from Google Drive folder This runs once when the space starts """ global cv_search, initialization_status, file_mapping try: logger.info("Initializing CV Semantic Search system...") cv_search = CVSemanticSearch() logger.info("Loading CVs from Google Drive folder...") successful, total, file_map = cv_search.load_cvs_from_google_drive(FOLDER_ID, API_KEY) file_mapping = file_map if successful > 0: initialization_status = f"✅ Successfully loaded {successful}/{total} CVs into database" logger.info(initialization_status) return True else: initialization_status = "❌ Failed to load any CVs from Google Drive. Check API key and folder ID." logger.error(initialization_status) return False except Exception as e: initialization_status = f"❌ Error during initialization: {str(e)}" logger.error(initialization_status) return False def process_job_description(jd_text, jd_file): """ Process job description from either text input or PDF file Args: jd_text: Job description as text jd_file: Job description as PDF file Returns: Processed job description text """ # Priority: PDF file over text input if jd_file is not None: try: with open(jd_file.name, 'rb') as f: pdf_content = f.read() extracted_text = cv_search.extract_text_from_pdf_bytes(pdf_content) if extracted_text.strip(): return extracted_text.strip() except Exception as e: logger.error(f"Error processing JD PDF: {str(e)}") # Fallback to text input if jd_text and jd_text.strip(): return jd_text.strip() return "" def search_matching_cvs(jd_text, jd_file, num_results): """ Search for CVs matching the job description Args: jd_text: Job description as text jd_file: Job description as PDF file num_results: Number of results to return Returns: Formatted search results """ global cv_search, file_mapping if cv_search is None: return f"❌ System not initialized properly.\n\n{initialization_status}\n\nPlease refresh the page or check the configuration." # Process job description job_description = process_job_description(jd_text, jd_file) if not job_description: return "❌ Please provide a job description either as text or upload a PDF file." # Get database info db_info = cv_search.get_database_info() if db_info['unique_cvs'] == 0: return f"❌ No CVs in database.\n\n{initialization_status}" # Perform search results = cv_search.search_cvs(job_description, top_k=num_results) if not results: return "❌ No matching CVs found. Try using different keywords or requirements in your job description." # Format results jd_preview = job_description[:150] + "..." if len(job_description) > 150 else job_description output = f"""# 🎯 Top {len(results)} Matching CVs **Job Description**: {jd_preview} --- """ for i, cv in enumerate(results, 1): similarity_percentage = cv['weighted_score'] * 100 filename = cv['filename'] # Get Google Drive link drive_link = "Not available" if filename in file_mapping: drive_link = file_mapping[filename]['webViewLink'] # Determine match quality if similarity_percentage >= 80: match_emoji = "🟢" match_text = "Excellent Match" elif similarity_percentage >= 65: match_emoji = "🟡" match_text = "Good Match" elif similarity_percentage >= 50: match_emoji = "🟠" match_text = "Fair Match" else: match_emoji = "🔴" match_text = "Weak Match" output += f"""## {i}. {filename} **{match_emoji} {match_text}** - **{similarity_percentage:.1f}% Overall Match** 📊 **Detailed Scores:** - Best Section Match: {cv['max_similarity']*100:.1f}% - Average Match: {cv['avg_similarity']*100:.1f}% - CV Sections Analyzed: {cv['chunk_count']} 💡 **Why This CV Matches:** *"{cv['best_match_text']}"* 🔗 **[Open CV in Google Drive]({drive_link})** --- """ return output def get_system_status(): """ Get current system status Returns: System information as formatted string """ global cv_search, initialization_status if cv_search is None: return f""" ## ⚠️ System Status: Not Ready {initialization_status} **Possible Issues:** - Invalid Google Drive API key - Incorrect folder ID - Folder is not public - No PDF files in the folder """ db_info = cv_search.get_database_info() if db_info['unique_cvs'] == 0: return f""" ## ⚠️ System Status: No CVs Loaded {initialization_status} **Please Check:** - Google Drive folder contains PDF files - Folder is publicly accessible - API key has proper permissions """ return f""" ## ✅ System Status: Ready for Search 📊 **Database Statistics:** - **CVs Loaded**: {db_info['unique_cvs']} resumes - **Text Chunks**: {db_info['total_chunks']} searchable segments - **Avg Chunks per CV**: {db_info['total_chunks'] / db_info['unique_cvs']:.1f} 🤖 **AI Model**: Sentence Transformers (all-MiniLM-L6-v2) 📁 **Sample CVs**: {', '.join(db_info['cv_filenames'][:3])}{'...' if len(db_info['cv_filenames']) > 3 else ''} """ # Create Gradio interface def create_interface(): """Create and return the Gradio interface""" with gr.Blocks( title="CV Semantic Search - Auto-loaded from Google Drive", theme=gr.themes.Soft(), css=""" .main-container { max-width: 1200px; margin: auto; padding: 20px; } .search-container { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white !important; padding: 30px; border-radius: 20px; margin: 20px 0; box-shadow: 0 10px 30px rgba(0,0,0,0.2); } .search-container * { color: white !important; } .status-container { background: #f8f9fa !important; color: #333 !important; padding: 25px; border-radius: 15px; margin: 20px 0; border-left: 5px solid #007bff; box-shadow: 0 5px 15px rgba(0,0,0,0.1); } .status-container * { color: #333 !important; } .results-container { background: #ffffff !important; color: #333 !important; padding: 25px; border-radius: 15px; border: 1px solid #dee2e6; margin: 20px 0; box-shadow: 0 5px 15px rgba(0,0,0,0.1); } .results-container * { color: #333 !important; } .header { text-align: center; padding: 30px; background: linear-gradient(135deg, #74b9ff, #0984e3); color: white !important; margin: -20px -20px 20px -20px; border-radius: 15px 15px 0 0; } .header * { color: white !important; } .tab-content { padding: 15px; } .markdown-content { background: #fff !important; color: #333 !important; padding: 20px; border-radius: 10px; } .markdown-content * { color: #333 !important; } """ ) as demo: with gr.Column(elem_classes=["main-container"]): gr.Markdown("""
# 🚀 CV Semantic Search System ## AI-Powered Resume Matching ### *Automatically synced with Google Drive*
""") # System Status Display with gr.Row(): status_display = gr.Markdown( get_system_status(), elem_classes=["status-container", "markdown-content"] ) # Main Search Interface with gr.Row(): with gr.Column(): with gr.Group(elem_classes=["search-container"]): gr.Markdown("## 📋 Job Description Input") with gr.Tab("📝 Text Input") as text_tab: jd_text = gr.Textbox( label="Paste Job Description", placeholder="""Paste your job description here... Example: Senior Software Engineer Position Requirements: • 5+ years of experience in Python, JavaScript, and React • Strong background in machine learning and AI • Experience with cloud platforms (AWS, Azure, GCP) • Knowledge of microservices and API development • Bachelor's degree in Computer Science or related field • Excellent problem-solving and communication skills Responsibilities: • Design and develop scalable web applications • Lead technical projects and mentor junior developers • Collaborate with cross-functional teams • Implement best practices for code quality and testing""", lines=12, max_lines=20, elem_classes=["tab-content"] ) with gr.Tab("📄 PDF Upload") as pdf_tab: jd_file = gr.File( label="Upload Job Description PDF", file_types=[".pdf"], file_count="single", elem_classes=["tab-content"] ) with gr.Row(): num_results = gr.Slider( label="Number of Top CVs to Return", minimum=1, maximum=10, value=5, step=1 ) search_btn = gr.Button( "🔍 Find Best Matching CVs", variant="primary", size="lg" ) # Search Results with gr.Row(): search_output = gr.Markdown( """ # 📋 How to Use This System: 1. **Enter Job Requirements**: Use the text box or upload a PDF with your job description 2. **Click Search**: The AI will analyze semantic meaning and find the best matches 3. **Review Results**: See ranked CVs with detailed similarity scores and explanations ## 🎯 What Makes This Special: - **Semantic Understanding**: Finds relevant CVs even if they don't use exact keywords - **Automatic Sync**: CVs are always up-to-date from your Google Drive folder - **Smart Ranking**: Combines multiple similarity metrics for accurate results - **Detailed Analysis**: Shows why each CV matches your requirements *Enter a job description above to get started!* """, elem_classes=["results-container", "markdown-content"] ) # Refresh Status Button with gr.Row(): refresh_btn = gr.Button("🔄 Refresh System Status", size="sm") # Event handlers search_btn.click( fn=search_matching_cvs, inputs=[jd_text, jd_file, num_results], outputs=[search_output] ) refresh_btn.click( fn=get_system_status, outputs=[status_display] ) # Clear text input when PDF is uploaded jd_file.change( fn=lambda: "", outputs=[jd_text] ) # Clear file input when text is entered jd_text.change( fn=lambda x: None if x.strip() else None, inputs=[jd_text], outputs=[jd_file] ) # Footer gr.Markdown(""" --- # 🛠️ Technical Details - **Vector Database**: ChromaDB (rebuilt on each restart) - **Embedding Model**: SentenceTransformers all-MiniLM-L6-v2 - **Text Extraction**: pdfplumber + OCR fallback for scanned documents - **CV Source**: Google Drive folder (automatically synced) - **Search Algorithm**: Cosine similarity with chunk aggregation ## 📞 Support If no results appear, check that: - Your Google Drive folder is public - The folder contains PDF files - Your API key is valid and has Drive API access """, elem_classes=["markdown-content"]) return demo def main(): """Main function to initialize and run the app""" logger.info("Starting CV Semantic Search application...") # Initialize database at startup if initialize_database(): logger.info("✅ Database initialization successful") else: logger.error("❌ Database initialization failed") # Create and launch interface demo = create_interface() demo.launch( share=True, # Enable sharing for Hugging Face Spaces server_name="0.0.0.0", # Enable access from outside container server_port=7860, # Standard port for Hugging Face Spaces show_error=True ) if __name__ == "__main__": main()