Spaces:
Sleeping
Sleeping
| # streamlit_app.py (moved from app.py) | |
| import streamlit as st | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| import pdfplumber | |
| from docx import Document | |
| from io import BytesIO | |
| import os | |
| from dotenv import load_dotenv | |
| from groq import Groq | |
| from urllib.parse import quote | |
| import pytesseract | |
| from PIL import Image | |
| # --- Load API Key --- | |
| load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), '..', '.env')) | |
| groq_api = os.getenv("GROQ_API_KEY") | |
| try: | |
| client = Groq(api_key=groq_api) if groq_api else None | |
| except Exception as e: | |
| st.error(f"Failed to initialize Groq client: {str(e)}") | |
| client = None | |
| # --- Resume Parser --- | |
| def extract_text(file): | |
| if file.name.endswith(".pdf"): | |
| try: | |
| with pdfplumber.open(file) as pdf: | |
| text = "\n".join(page.extract_text() or "" for page in pdf.pages) | |
| if not text.strip(): | |
| st.warning("⚠️ No text extracted from PDF. Attempting OCR...") | |
| # OCR fallback | |
| images = [page.to_image(resolution=300).original for page in pdf.pages] | |
| ocr_text = "\n".join(pytesseract.image_to_string(Image.open(img)) for img in images) | |
| return ocr_text | |
| return text | |
| except Exception as e: | |
| st.error(f"PDF error: {e}") | |
| elif file.name.endswith(".docx"): | |
| try: | |
| text = "\n".join(p.text for p in Document(file).paragraphs) | |
| if not text.strip(): | |
| st.warning("⚠️ DOCX file has no readable text.") | |
| return text | |
| except Exception as e: | |
| st.error(f"DOCX error: {e}") | |
| else: | |
| st.warning("Unsupported file type.") | |
| return None | |
| # --- Dice Scraper --- | |
| def scrape_dice_jobs(title, loc): | |
| base_url = "https://www.dice.com/jobs" | |
| headers = {"User-Agent": "Mozilla/5.0"} | |
| search_url = f"{base_url}?q={quote(title)}&location={quote(loc)}" | |
| response = requests.get(search_url, headers=headers) | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| jobs = [] | |
| listings = soup.select("div.card") | |
| for card in listings: | |
| job_title = card.select_one("a.card-title-link") | |
| company = card.select_one("span.card-company") | |
| location = card.select_one("span.card-location") | |
| link = job_title['href'] if job_title else None | |
| if job_title and company: | |
| jobs.append({ | |
| "Title": job_title.text.strip(), | |
| "Company": company.text.strip(), | |
| "Location": location.text.strip() if location else "", | |
| "Link": f"https://www.dice.com{link}" if link else "" | |
| }) | |
| return pd.DataFrame(jobs) | |
| # --- LinkedIn Scraper --- | |
| def scrape_linkedin_jobs(title, loc): | |
| headers = {"User-Agent": "Mozilla/5.0"} | |
| search_url = f"https://www.linkedin.com/jobs/search/?keywords={quote(title)}&location={quote(loc)}" | |
| response = requests.get(search_url, headers=headers) | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| jobs = [] | |
| listings = soup.select(".base-search-card") | |
| for card in listings: | |
| job_title = card.select_one("h3") | |
| company = card.select_one("h4") | |
| location = card.select_one(".job-search-card__location") | |
| link = card.select_one("a")['href'] if card.select_one("a") else None | |
| if job_title and company: | |
| jobs.append({ | |
| "Title": job_title.text.strip(), | |
| "Company": company.text.strip(), | |
| "Location": location.text.strip() if location else "", | |
| "Link": link or "" | |
| }) | |
| return pd.DataFrame(jobs) | |
| # --- Glassdoor Scraper --- | |
| def scrape_glassdoor_jobs(title, loc): | |
| headers = {"User-Agent": "Mozilla/5.0"} | |
| search_url = f"https://www.glassdoor.com/Job/jobs.htm?sc.keyword={quote(title)}&locT=C&locId=0&locKeyword={quote(loc)}" | |
| response = requests.get(search_url, headers=headers) | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| jobs = [] | |
| listings = soup.select(".react-job-listing") | |
| for card in listings: | |
| job_title = card.select_one("a.jobLink") | |
| company = card.select_one("div.jobHeader") | |
| location = card.select_one("span.pr-xxsm") | |
| link = job_title['href'] if job_title else None | |
| if job_title and company: | |
| jobs.append({ | |
| "Title": job_title.text.strip(), | |
| "Company": company.text.strip(), | |
| "Location": location.text.strip() if location else "", | |
| "Link": f"https://www.glassdoor.com{link}" if link else "" | |
| }) | |
| return pd.DataFrame(jobs) | |
| # --- AI Matching --- | |
| def match_resume_with_jobs(resume_text, jobs_df): | |
| if not client: | |
| return jobs_df.assign(MatchScore="API Error") | |
| results = [] | |
| for _, row in jobs_df.iterrows(): | |
| prompt = f""" | |
| Compare the following resume and job title. Rate the match from 0 to 100. | |
| Resume: | |
| {resume_text[:2000]} | |
| Job Title: {row['Title']} | |
| Company: {row['Company']} | |
| Reply with a number only between 0 to 100. | |
| """ | |
| try: | |
| response = client.chat.completions.create( | |
| model="llama3-70b-8192", | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.2, | |
| max_tokens=10 | |
| ) | |
| score = response.choices[0].message.content.strip() | |
| except Exception as e: | |
| score = "Error" | |
| results.append(score) | |
| jobs_df["MatchScore"] = results | |
| return jobs_df | |
| # --- UI Setup --- | |
| st.set_page_config(page_title="Job Matcher Bot", layout="wide") | |
| st.title("🧲 Job Matcher & Cover Letter Generator") | |
| # --- Inputs --- | |
| st.subheader("Step 1: Upload Resume") | |
| resume_file = st.file_uploader("Upload Resume", type=["pdf", "docx"]) | |
| st.subheader("Step 2: Search Jobs") | |
| with st.form("job_form"): | |
| job_title = st.text_input("Job Title", value="Python Developer") | |
| location = st.text_input("Location", value="Remote") | |
| platform = st.selectbox("Choose Job Platform", ["Dice", "LinkedIn", "Glassdoor"]) | |
| submit = st.form_submit_button("Search & Match") | |
| # --- Main Execution --- | |
| if submit and resume_file: | |
| with st.spinner("Extracting resume & jobs..."): | |
| resume_text = extract_text(resume_file) | |
| if platform == "Dice": | |
| job_results = scrape_dice_jobs(job_title, location) | |
| elif platform == "LinkedIn": | |
| job_results = scrape_linkedin_jobs(job_title, location) | |
| else: | |
| job_results = scrape_glassdoor_jobs(job_title, location) | |
| if job_results.empty: | |
| st.warning("❌ No jobs found. Try a different search.") | |
| elif not resume_text: | |
| st.error("❌ Failed to extract text from resume.") | |
| else: | |
| st.success(f"✅ Found {len(job_results)} jobs on {platform}. Matching with resume...") | |
| matched_jobs = match_resume_with_jobs(resume_text, job_results) | |
| st.dataframe(matched_jobs.sort_values("MatchScore", ascending=False), use_container_width=True) | |
| csv = matched_jobs.to_csv(index=False).encode('utf-8') | |
| st.download_button("⬇ Download Matches (CSV)", csv, "matched_jobs.csv") | |
| else: | |
| st.info("📄 Please upload resume and enter search criteria to begin.") | |