Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| from pathlib import Path | |
| import pickle | |
| import gdown | |
| import zipfile | |
| import os | |
| # Page config | |
| st.set_page_config( | |
| page_title="Skills-Based Job Matching System", | |
| page_icon="πΌ", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Custom CSS (your existing CSS here) | |
| st.markdown(""" | |
| <style> | |
| .main-header { | |
| font-size: 2.5rem; | |
| color: #1e40af; | |
| font-weight: bold; | |
| margin-bottom: 0.5rem; | |
| } | |
| .sub-header { | |
| font-size: 1.2rem; | |
| color: #64748b; | |
| margin-bottom: 2rem; | |
| } | |
| .skill-badge { | |
| display: inline-block; | |
| padding: 0.25rem 0.75rem; | |
| margin: 0.25rem; | |
| border-radius: 1rem; | |
| font-size: 0.875rem; | |
| font-weight: 500; | |
| } | |
| .skill-match { | |
| background-color: #dcfce7; | |
| color: #166534; | |
| } | |
| .skill-missing { | |
| background-color: #fee2e2; | |
| color: #991b1b; | |
| } | |
| .match-score { | |
| font-size: 2rem; | |
| font-weight: bold; | |
| color: #1e40af; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # ============================================ | |
| # GOOGLE DRIVE SETUP | |
| # ============================================ | |
| def download_from_gdrive(): | |
| """Download and extract project data from Google Drive""" | |
| # REPLACE THIS with your Google Drive file ID | |
| GDRIVE_FILE_ID = "1mUUvKpFX1usIpLu-dSiYc-F6Zogfw95o" | |
| # File paths | |
| zip_path = "project_data.zip" | |
| # Check if data already exists | |
| data_exists = ( | |
| Path("data/processed_jobs.parquet").exists() and | |
| Path("models").exists() | |
| ) | |
| if data_exists: | |
| return True, "Data already loaded" | |
| try: | |
| # Download from Google Drive | |
| st.info("π₯ Downloading data from Google Drive (1.2GB)...") | |
| st.info("This is a one-time download and will be cached. Please wait...") | |
| url = f"https://drive.google.com/uc?id={GDRIVE_FILE_ID}" | |
| # Create progress bar | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| def show_progress(current, total): | |
| if total > 0: | |
| progress = int((current / total) * 100) | |
| progress_bar.progress(min(progress, 100)) | |
| status_text.text(f"Downloading: {progress}% ({current/1024/1024:.1f}MB / {total/1024/1024:.1f}MB)") | |
| # Download with progress | |
| output = gdown.download(url, zip_path, quiet=False) | |
| if output is None: | |
| return False, "Failed to download. Please check if Google Drive link is public." | |
| progress_bar.progress(100) | |
| status_text.text("Download complete!") | |
| # Extract zip file | |
| st.info("π¦ Extracting files...") | |
| with zipfile.ZipFile(zip_path, 'r') as zip_ref: | |
| # Get total files for progress | |
| total_files = len(zip_ref.filelist) | |
| extract_progress = st.progress(0) | |
| for i, file in enumerate(zip_ref.filelist): | |
| zip_ref.extract(file, ".") | |
| extract_progress.progress((i + 1) / total_files) | |
| # Clean up | |
| os.remove(zip_path) | |
| return True, "β Data downloaded and ready!" | |
| except Exception as e: | |
| return False, f"Error: {str(e)}\n\nPlease ensure:\n1. Google Drive link is set to 'Anyone with the link'\n2. File ID is correct\n3. File is not corrupted" | |
| # Download data first | |
| download_success, download_message = download_from_gdrive() | |
| if not download_success: | |
| st.error("β Failed to load data") | |
| st.error(download_message) | |
| st.info(""" | |
| **Setup Instructions:** | |
| 1. Upload your 1.2GB zip file to Google Drive | |
| 2. Right-click β Get link β Set to "Anyone with the link" | |
| 3. Copy the file ID from the link | |
| 4. Update GDRIVE_FILE_ID in the code | |
| Example link: `https://drive.google.com/file/d/1a2b3c4d5e6f7g8h9i0j/view` | |
| File ID: `1a2b3c4d5e6f7g8h9i0j` | |
| """) | |
| st.stop() | |
| else: | |
| st.success(download_message) | |
| # ============================================ | |
| # YOUR EXISTING CODE CONTINUES HERE | |
| # ============================================ | |
| # Initialize session state | |
| if 'user_profile' not in st.session_state: | |
| st.session_state.user_profile = { | |
| 'skills': [], | |
| 'experience_level': 2, | |
| 'min_salary': 0, | |
| 'city': '', | |
| 'state': '', | |
| 'remote_only': False, | |
| 'company_size': -1, | |
| 'benefits': [] | |
| } | |
| if 'search_results' not in st.session_state: | |
| st.session_state.search_results = None | |
| if 'selected_job' not in st.session_state: | |
| st.session_state.selected_job = None | |
| # Import your modules (make sure they're in the zip) | |
| try: | |
| from skill_extractor import SkillExtractor | |
| from ranking_pipeline import RankingPipeline | |
| except ImportError as e: | |
| st.error(f"Error importing modules: {e}") | |
| st.info("Make sure skill_extractor.py and ranking_pipeline.py are in your zip file") | |
| st.stop() | |
| # Cache data loading | |
| def load_models_and_data(): | |
| """Load all models and data.""" | |
| try: | |
| # Load processed data | |
| jobs_df = pd.read_parquet('data/processed_jobs.parquet') | |
| # Initialize pipeline | |
| pipeline = RankingPipeline(model_dir='models') | |
| pipeline.jobs_df = jobs_df | |
| pipeline.load_indices() | |
| pipeline.load_ltr_model() | |
| # Load skill extractor | |
| skill_extractor = SkillExtractor() | |
| return pipeline, jobs_df, skill_extractor | |
| except Exception as e: | |
| st.error(f"Error loading models: {e}") | |
| raise e | |
| try: | |
| pipeline, jobs_df, skill_extractor = load_models_and_data() | |
| data_loaded = True | |
| except Exception as e: | |
| st.error(f"Error loading data: {e}") | |
| st.info("Please check that your zip file contains:") | |
| st.code(""" | |
| βββ data/ | |
| β βββ processed_jobs.parquet | |
| βββ models/ | |
| β βββ (your model files) | |
| βββ skill_extractor.py | |
| βββ ranking_pipeline.py | |
| """) | |
| data_loaded = False | |
| st.stop() | |
| # ============================================ | |
| # REST OF YOUR EXISTING APP CODE | |
| # ============================================ | |
| # Sidebar - User Profile | |
| with st.sidebar: | |
| st.markdown("### π€ User Profile") | |
| # Skills input | |
| st.markdown("#### Skills") | |
| all_skills = skill_extractor.get_all_skills() if data_loaded else [] | |
| selected_skills = st.multiselect( | |
| "Select your skills", | |
| options=all_skills, | |
| default=st.session_state.user_profile['skills'], | |
| help="Start typing to search skills" | |
| ) | |
| st.session_state.user_profile['skills'] = selected_skills | |
| # ... (rest of your sidebar code) | |
| # Main content | |
| st.markdown('<h1 class="main-header">πΌ Skills-Based Job Matching System</h1>', unsafe_allow_html=True) | |
| st.markdown('<p class="sub-header">Find your perfect job match using AI-powered ranking</p>', unsafe_allow_html=True) | |
| # ... (rest of your existing code) |