ir_project / src /streamlit_app.py
sdmadhav's picture
Update src/streamlit_app.py
dd26653 verified
import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from pathlib import Path
import pickle
import gdown
import zipfile
import os
# Page config
st.set_page_config(
page_title="Skills-Based Job Matching System",
page_icon="πŸ’Ό",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS (your existing CSS here)
st.markdown("""
<style>
.main-header {
font-size: 2.5rem;
color: #1e40af;
font-weight: bold;
margin-bottom: 0.5rem;
}
.sub-header {
font-size: 1.2rem;
color: #64748b;
margin-bottom: 2rem;
}
.skill-badge {
display: inline-block;
padding: 0.25rem 0.75rem;
margin: 0.25rem;
border-radius: 1rem;
font-size: 0.875rem;
font-weight: 500;
}
.skill-match {
background-color: #dcfce7;
color: #166534;
}
.skill-missing {
background-color: #fee2e2;
color: #991b1b;
}
.match-score {
font-size: 2rem;
font-weight: bold;
color: #1e40af;
}
</style>
""", unsafe_allow_html=True)
# ============================================
# GOOGLE DRIVE SETUP
# ============================================
@st.cache_resource
def download_from_gdrive():
"""Download and extract project data from Google Drive"""
# REPLACE THIS with your Google Drive file ID
GDRIVE_FILE_ID = "1mUUvKpFX1usIpLu-dSiYc-F6Zogfw95o"
# File paths
zip_path = "project_data.zip"
# Check if data already exists
data_exists = (
Path("data/processed_jobs.parquet").exists() and
Path("models").exists()
)
if data_exists:
return True, "Data already loaded"
try:
# Download from Google Drive
st.info("πŸ“₯ Downloading data from Google Drive (1.2GB)...")
st.info("This is a one-time download and will be cached. Please wait...")
url = f"https://drive.google.com/uc?id={GDRIVE_FILE_ID}"
# Create progress bar
progress_bar = st.progress(0)
status_text = st.empty()
def show_progress(current, total):
if total > 0:
progress = int((current / total) * 100)
progress_bar.progress(min(progress, 100))
status_text.text(f"Downloading: {progress}% ({current/1024/1024:.1f}MB / {total/1024/1024:.1f}MB)")
# Download with progress
output = gdown.download(url, zip_path, quiet=False)
if output is None:
return False, "Failed to download. Please check if Google Drive link is public."
progress_bar.progress(100)
status_text.text("Download complete!")
# Extract zip file
st.info("πŸ“¦ Extracting files...")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
# Get total files for progress
total_files = len(zip_ref.filelist)
extract_progress = st.progress(0)
for i, file in enumerate(zip_ref.filelist):
zip_ref.extract(file, ".")
extract_progress.progress((i + 1) / total_files)
# Clean up
os.remove(zip_path)
return True, "βœ“ Data downloaded and ready!"
except Exception as e:
return False, f"Error: {str(e)}\n\nPlease ensure:\n1. Google Drive link is set to 'Anyone with the link'\n2. File ID is correct\n3. File is not corrupted"
# Download data first
download_success, download_message = download_from_gdrive()
if not download_success:
st.error("❌ Failed to load data")
st.error(download_message)
st.info("""
**Setup Instructions:**
1. Upload your 1.2GB zip file to Google Drive
2. Right-click β†’ Get link β†’ Set to "Anyone with the link"
3. Copy the file ID from the link
4. Update GDRIVE_FILE_ID in the code
Example link: `https://drive.google.com/file/d/1a2b3c4d5e6f7g8h9i0j/view`
File ID: `1a2b3c4d5e6f7g8h9i0j`
""")
st.stop()
else:
st.success(download_message)
# ============================================
# YOUR EXISTING CODE CONTINUES HERE
# ============================================
# Initialize session state
if 'user_profile' not in st.session_state:
st.session_state.user_profile = {
'skills': [],
'experience_level': 2,
'min_salary': 0,
'city': '',
'state': '',
'remote_only': False,
'company_size': -1,
'benefits': []
}
if 'search_results' not in st.session_state:
st.session_state.search_results = None
if 'selected_job' not in st.session_state:
st.session_state.selected_job = None
# Import your modules (make sure they're in the zip)
try:
from skill_extractor import SkillExtractor
from ranking_pipeline import RankingPipeline
except ImportError as e:
st.error(f"Error importing modules: {e}")
st.info("Make sure skill_extractor.py and ranking_pipeline.py are in your zip file")
st.stop()
# Cache data loading
@st.cache_resource
def load_models_and_data():
"""Load all models and data."""
try:
# Load processed data
jobs_df = pd.read_parquet('data/processed_jobs.parquet')
# Initialize pipeline
pipeline = RankingPipeline(model_dir='models')
pipeline.jobs_df = jobs_df
pipeline.load_indices()
pipeline.load_ltr_model()
# Load skill extractor
skill_extractor = SkillExtractor()
return pipeline, jobs_df, skill_extractor
except Exception as e:
st.error(f"Error loading models: {e}")
raise e
try:
pipeline, jobs_df, skill_extractor = load_models_and_data()
data_loaded = True
except Exception as e:
st.error(f"Error loading data: {e}")
st.info("Please check that your zip file contains:")
st.code("""
β”œβ”€β”€ data/
β”‚ └── processed_jobs.parquet
β”œβ”€β”€ models/
β”‚ └── (your model files)
β”œβ”€β”€ skill_extractor.py
└── ranking_pipeline.py
""")
data_loaded = False
st.stop()
# ============================================
# REST OF YOUR EXISTING APP CODE
# ============================================
# Sidebar - User Profile
with st.sidebar:
st.markdown("### πŸ‘€ User Profile")
# Skills input
st.markdown("#### Skills")
all_skills = skill_extractor.get_all_skills() if data_loaded else []
selected_skills = st.multiselect(
"Select your skills",
options=all_skills,
default=st.session_state.user_profile['skills'],
help="Start typing to search skills"
)
st.session_state.user_profile['skills'] = selected_skills
# ... (rest of your sidebar code)
# Main content
st.markdown('<h1 class="main-header">πŸ’Ό Skills-Based Job Matching System</h1>', unsafe_allow_html=True)
st.markdown('<p class="sub-header">Find your perfect job match using AI-powered ranking</p>', unsafe_allow_html=True)
# ... (rest of your existing code)