import gradio as gr import os from semantic_search import CVSemanticSearch import logging # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Google Drive Configuration - UPDATE THESE VALUES FOLDER_ID = "1j1faOlXxoYfPLdzDfGvDbtkENsRoDxXN" # Replace with your folder ID API_KEY = os.getenv("GOOGLE_DRIVE_API_KEY") # Replace with your API key # Global variables to store the search system and file mapping cv_search = None file_mapping = {} initialization_status = "Initializing..." def initialize_database(): """ Initialize the database by loading CVs from Google Drive folder This runs once when the space starts """ global cv_search, initialization_status, file_mapping try: logger.info("Initializing CV Semantic Search system...") cv_search = CVSemanticSearch() logger.info("Loading CVs from Google Drive folder...") successful, total, file_map = cv_search.load_cvs_from_google_drive(FOLDER_ID, API_KEY) file_mapping = file_map if successful > 0: initialization_status = f"✅ Successfully loaded {successful}/{total} CVs into database" logger.info(initialization_status) return True else: initialization_status = "❌ Failed to load any CVs from Google Drive. Check API key and folder ID." logger.error(initialization_status) return False except Exception as e: initialization_status = f"❌ Error during initialization: {str(e)}" logger.error(initialization_status) return False def process_job_description(jd_text, jd_file): """ Process job description from either text input or PDF file Args: jd_text: Job description as text jd_file: Job description as PDF file Returns: Processed job description text """ # Priority: PDF file over text input if jd_file is not None: try: with open(jd_file.name, 'rb') as f: pdf_content = f.read() extracted_text = cv_search.extract_text_from_pdf_bytes(pdf_content) if extracted_text.strip(): return extracted_text.strip() except Exception as e: logger.error(f"Error processing JD PDF: {str(e)}") # Fallback to text input if jd_text and jd_text.strip(): return jd_text.strip() return "" def search_matching_cvs(jd_text, jd_file, num_results): """ Search for CVs matching the job description Args: jd_text: Job description as text jd_file: Job description as PDF file num_results: Number of results to return Returns: Formatted search results """ global cv_search, file_mapping if cv_search is None: return f"❌ System not initialized properly.\n\n{initialization_status}\n\nPlease refresh the page or check the configuration." # Process job description job_description = process_job_description(jd_text, jd_file) if not job_description: return "❌ Please provide a job description either as text or upload a PDF file." # Get database info db_info = cv_search.get_database_info() if db_info['unique_cvs'] == 0: return f"❌ No CVs in database.\n\n{initialization_status}" # Perform search results = cv_search.search_cvs(job_description, top_k=num_results) if not results: return "❌ No matching CVs found. Try using different keywords or requirements in your job description." # Format results jd_preview = job_description[:150] + "..." if len(job_description) > 150 else job_description output = f"""# 🎯 Top {len(results)} Matching CVs **Job Description**: {jd_preview} --- """ for i, cv in enumerate(results, 1): similarity_percentage = cv['weighted_score'] * 100 filename = cv['filename'] # Get Google Drive link drive_link = "Not available" if filename in file_mapping: drive_link = file_mapping[filename]['webViewLink'] # Determine match quality if similarity_percentage >= 80: match_emoji = "🟢" match_text = "Excellent Match" elif similarity_percentage >= 65: match_emoji = "🟡" match_text = "Good Match" elif similarity_percentage >= 50: match_emoji = "🟠" match_text = "Fair Match" else: match_emoji = "🔴" match_text = "Weak Match" output += f"""## {i}. {filename} **{match_emoji} {match_text}** - **{similarity_percentage:.1f}% Overall Match** 📊 **Detailed Scores:** - Best Section Match: {cv['max_similarity']*100:.1f}% - Average Match: {cv['avg_similarity']*100:.1f}% - CV Sections Analyzed: {cv['chunk_count']} 💡 **Why This CV Matches:** *"{cv['best_match_text']}"* 🔗 **[Open CV in Google Drive]({drive_link})** --- """ return output def get_system_status(): """ Get current system status Returns: System information as formatted string """ global cv_search, initialization_status if cv_search is None: return f""" ## ⚠️ System Status: Not Ready {initialization_status} **Possible Issues:** - Invalid Google Drive API key - Incorrect folder ID - Folder is not public - No PDF files in the folder """ db_info = cv_search.get_database_info() if db_info['unique_cvs'] == 0: return f""" ## ⚠️ System Status: No CVs Loaded {initialization_status} **Please Check:** - Google Drive folder contains PDF files - Folder is publicly accessible - API key has proper permissions """ return f""" ## ✅ System Status: Ready for Search 📊 **Database Statistics:** - **CVs Loaded**: {db_info['unique_cvs']} resumes - **Text Chunks**: {db_info['total_chunks']} searchable segments - **Avg Chunks per CV**: {db_info['total_chunks'] / db_info['unique_cvs']:.1f} 🤖 **AI Model**: Sentence Transformers (all-MiniLM-L6-v2) 📁 **Sample CVs**: {', '.join(db_info['cv_filenames'][:3])}{'...' if len(db_info['cv_filenames']) > 3 else ''} """ # Create Gradio interface def create_interface(): """Create and return the Gradio interface""" with gr.Blocks( title="CV Semantic Search - Auto-loaded from Google Drive", theme=gr.themes.Soft(), css=""" .main-container { max-width: 1200px; margin: auto; padding: 20px; } .search-container { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white !important; padding: 30px; border-radius: 20px; margin: 20px 0; box-shadow: 0 10px 30px rgba(0,0,0,0.2); } .search-container * { color: white !important; } .status-container { background: #f8f9fa !important; color: #333 !important; padding: 25px; border-radius: 15px; margin: 20px 0; border-left: 5px solid #007bff; box-shadow: 0 5px 15px rgba(0,0,0,0.1); } .status-container * { color: #333 !important; } .results-container { background: #ffffff !important; color: #333 !important; padding: 25px; border-radius: 15px; border: 1px solid #dee2e6; margin: 20px 0; box-shadow: 0 5px 15px rgba(0,0,0,0.1); } .results-container * { color: #333 !important; } .header { text-align: center; padding: 30px; background: linear-gradient(135deg, #74b9ff, #0984e3); color: white !important; margin: -20px -20px 20px -20px; border-radius: 15px 15px 0 0; } .header * { color: white !important; } .tab-content { padding: 15px; } .markdown-content { background: #fff !important; color: #333 !important; padding: 20px; border-radius: 10px; } .markdown-content * { color: #333 !important; } """ ) as demo: with gr.Column(elem_classes=["main-container"]): gr.Markdown("""