Spaces:

sdmadhav
/

ir_project

Sleeping

App Files Files Community

ir_project / src /streamlit_app.py

sdmadhav

Update src/streamlit_app.py

dd26653 verified 7 months ago

raw

history blame contribute delete

7.16 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import plotly.graph_objects as go
	import plotly.express as px
	from pathlib import Path
	import pickle
	import gdown
	import zipfile
	import os

	# Page config
	st.set_page_config(
	page_title="Skills-Based Job Matching System",
	page_icon="💼",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS (your existing CSS here)
	st.markdown("""
	<style>
	.main-header {
	font-size: 2.5rem;
	color: #1e40af;
	font-weight: bold;
	margin-bottom: 0.5rem;
	}
	.sub-header {
	font-size: 1.2rem;
	color: #64748b;
	margin-bottom: 2rem;
	}
	.skill-badge {
	display: inline-block;
	padding: 0.25rem 0.75rem;
	margin: 0.25rem;
	border-radius: 1rem;
	font-size: 0.875rem;
	font-weight: 500;
	}
	.skill-match {
	background-color: #dcfce7;
	color: #166534;
	}
	.skill-missing {
	background-color: #fee2e2;
	color: #991b1b;
	}
	.match-score {
	font-size: 2rem;
	font-weight: bold;
	color: #1e40af;
	}
	</style>
	""", unsafe_allow_html=True)

	# ============================================
	# GOOGLE DRIVE SETUP
	# ============================================
	@st.cache_resource
	def download_from_gdrive():
	"""Download and extract project data from Google Drive"""

	# REPLACE THIS with your Google Drive file ID
	GDRIVE_FILE_ID = "1mUUvKpFX1usIpLu-dSiYc-F6Zogfw95o"

	# File paths
	zip_path = "project_data.zip"

	# Check if data already exists
	data_exists = (
	Path("data/processed_jobs.parquet").exists() and
	Path("models").exists()
	)

	if data_exists:
	return True, "Data already loaded"

	try:
	# Download from Google Drive
	st.info("📥 Downloading data from Google Drive (1.2GB)...")
	st.info("This is a one-time download and will be cached. Please wait...")

	url = f"https://drive.google.com/uc?id={GDRIVE_FILE_ID}"

	# Create progress bar
	progress_bar = st.progress(0)
	status_text = st.empty()

	def show_progress(current, total):
	if total > 0:
	progress = int((current / total) * 100)
	progress_bar.progress(min(progress, 100))
	status_text.text(f"Downloading: {progress}% ({current/1024/1024:.1f}MB / {total/1024/1024:.1f}MB)")

	# Download with progress
	output = gdown.download(url, zip_path, quiet=False)

	if output is None:
	return False, "Failed to download. Please check if Google Drive link is public."

	progress_bar.progress(100)
	status_text.text("Download complete!")

	# Extract zip file
	st.info("📦 Extracting files...")
	with zipfile.ZipFile(zip_path, 'r') as zip_ref:
	# Get total files for progress
	total_files = len(zip_ref.filelist)
	extract_progress = st.progress(0)

	for i, file in enumerate(zip_ref.filelist):
	zip_ref.extract(file, ".")
	extract_progress.progress((i + 1) / total_files)

	# Clean up
	os.remove(zip_path)

	return True, "✓ Data downloaded and ready!"

	except Exception as e:
	return False, f"Error: {str(e)}\n\nPlease ensure:\n1. Google Drive link is set to 'Anyone with the link'\n2. File ID is correct\n3. File is not corrupted"

	# Download data first
	download_success, download_message = download_from_gdrive()

	if not download_success:
	st.error("❌ Failed to load data")
	st.error(download_message)
	st.info("""
	Setup Instructions:
	1. Upload your 1.2GB zip file to Google Drive
	2. Right-click → Get link → Set to "Anyone with the link"
	3. Copy the file ID from the link
	4. Update GDRIVE_FILE_ID in the code

	Example link: `https://drive.google.com/file/d/1a2b3c4d5e6f7g8h9i0j/view`
	File ID: `1a2b3c4d5e6f7g8h9i0j`
	""")
	st.stop()
	else:
	st.success(download_message)

	# ============================================
	# YOUR EXISTING CODE CONTINUES HERE
	# ============================================

	# Initialize session state
	if 'user_profile' not in st.session_state:
	st.session_state.user_profile = {
	'skills': [],
	'experience_level': 2,
	'min_salary': 0,
	'city': '',
	'state': '',
	'remote_only': False,
	'company_size': -1,
	'benefits': []
	}

	if 'search_results' not in st.session_state:
	st.session_state.search_results = None

	if 'selected_job' not in st.session_state:
	st.session_state.selected_job = None

	# Import your modules (make sure they're in the zip)
	try:
	from skill_extractor import SkillExtractor
	from ranking_pipeline import RankingPipeline
	except ImportError as e:
	st.error(f"Error importing modules: {e}")
	st.info("Make sure skill_extractor.py and ranking_pipeline.py are in your zip file")
	st.stop()

	# Cache data loading
	@st.cache_resource
	def load_models_and_data():
	"""Load all models and data."""
	try:
	# Load processed data
	jobs_df = pd.read_parquet('data/processed_jobs.parquet')

	# Initialize pipeline
	pipeline = RankingPipeline(model_dir='models')
	pipeline.jobs_df = jobs_df
	pipeline.load_indices()
	pipeline.load_ltr_model()

	# Load skill extractor
	skill_extractor = SkillExtractor()

	return pipeline, jobs_df, skill_extractor
	except Exception as e:
	st.error(f"Error loading models: {e}")
	raise e

	try:
	pipeline, jobs_df, skill_extractor = load_models_and_data()
	data_loaded = True
	except Exception as e:
	st.error(f"Error loading data: {e}")
	st.info("Please check that your zip file contains:")
	st.code("""
	├── data/
	│ └── processed_jobs.parquet
	├── models/
	│ └── (your model files)
	├── skill_extractor.py
	└── ranking_pipeline.py
	""")
	data_loaded = False
	st.stop()

	# ============================================
	# REST OF YOUR EXISTING APP CODE
	# ============================================

	# Sidebar - User Profile
	with st.sidebar:
	st.markdown("### 👤 User Profile")

	# Skills input
	st.markdown("#### Skills")
	all_skills = skill_extractor.get_all_skills() if data_loaded else []

	selected_skills = st.multiselect(
	"Select your skills",
	options=all_skills,
	default=st.session_state.user_profile['skills'],
	help="Start typing to search skills"
	)
	st.session_state.user_profile['skills'] = selected_skills

	# ... (rest of your sidebar code)

	# Main content
	st.markdown('<h1 class="main-header">💼 Skills-Based Job Matching System</h1>', unsafe_allow_html=True)
	st.markdown('<p class="sub-header">Find your perfect job match using AI-powered ranking</p>', unsafe_allow_html=True)

	# ... (rest of your existing code)