Spaces:

meesamraza
/

Programming_Developer_Advisor_Chatbot

Sleeping

App Files Files Community

meesamraza commited on Dec 6, 2025

Commit

d915eee

verified ·

1 Parent(s): 5115d95

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -75

app.py CHANGED Viewed

@@ -1,21 +1,29 @@
 import streamlit as st
 import pandas as pd
 import io
 import os
-import fitz  # PyMuPDF
 import docx2txt
 from groq import Groq
 from dotenv import load_dotenv
 from pydantic import BaseModel, Field
-# Test message to verify app is running
-st.title("🛠️ Resume Analyzer App")
-st.write("If you can see this, the app is running!")
-load_dotenv('.env')  # Look for .env in the same directory as the script
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 # Initialize Groq Client
 if GROQ_API_KEY:
     try:
@@ -24,18 +32,13 @@ if GROQ_API_KEY:
         st.error(f"Error initializing Groq Client: {e}")
         st.stop()
 else:
-    st.error("GROQ_API_KEY not found. Please ensure the .env file is in the project root and contains your key.")
     st.stop()
-# Admin Password (as requested)
-ADMIN_PASSWORD = "admin"
 # Initialize Session State
 if 'is_admin_logged_in' not in st.session_state:
     st.session_state.is_admin_logged_in = False
 if 'analyzed_data' not in st.session_state:
     initial_cols = [
         'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
         'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
@@ -55,11 +58,14 @@ class ResumeAnalysis(BaseModel):
     certifications: list[str] = Field(description="List of professional certifications (e.g., PMP, AWS Certified).")
     experience_summary: str = Field(description="A concise summary of the candidate's professional experience.")
     education_summary: str = Field(description="A concise summary of the candidate's highest education.")
-    communication_skills: str = Field(description="A rating (1-10) or brief description of communication skills based on the resume language.")
     technical_skills: list[str] = Field(description="List of technical skills/technologies mentioned (e.g., Python, SQL, Docker).")
-    aba_therapy_skills: str = Field(description="Specific mention or score (1-10) for ABA Therapy skills, ONLY if the role is 'Therapist'.")
-    rbt_bcba_certification: str = Field(description="Indicate 'Yes' or 'No' if RBT/BCBA certification is mentioned, ONLY if the role is 'Therapist'.")
-    autism_care_experience_score: str = Field(description="A score (1-10) for Autism-Care Experience, ONLY if the role is 'Therapist'.")
 # --- 3. HELPER FUNCTIONS ---
@@ -68,19 +74,17 @@ def extract_text_from_file(uploaded_file):
     file_type = uploaded_file.type
     try:
         if file_type == "application/pdf":
-            # Use PyMuPDF for PDF
             with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
                 text = ""
                 for page in doc:
                     text += page.get_text()
             return text
         elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
-            # Use docx2txt for DOCX
             return docx2txt.process(uploaded_file)
         else:
             return ""
     except Exception as e:
-        st.error(f"Error extracting text: {e}")
         return ""
 @st.cache_data(show_spinner="Analyzing resume with Groq...")
@@ -93,8 +97,14 @@ def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
         therapist_instructions = (
             "Because the job role is 'Therapist', you MUST carefully look for: "
             "1. ABA Therapy Skills, RBT/BCBA Certification, and Autism-Care Experience. "
-            "2. Provide a score from 1-10 for the specialized fields: 'aba_therapy_skills' and 'autism_care_experience_score'. "
-            "3. Set 'rbt_bcba_certification' to 'Yes' or 'No'."
         )
     # System Prompt for Groq
@@ -102,6 +112,7 @@ def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
         f"You are a professional Resume Analyzer. Your task is to extract specific information from the provided resume text. "
         f"The candidate is applying for the role of '{job_role}'. "
         f"Follow the instructions precisely and return a JSON object that strictly adheres to the provided Pydantic schema. "
         f"For skills, provide a list of 5-10 most relevant items. {therapist_instructions}"
     )
@@ -121,62 +132,55 @@ def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
         return analysis
     except Exception as e:
-        st.error(f"Groq API Error: {e}")
         # Return an empty/default analysis object on failure
-        return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="0", technical_skills=[], aba_therapy_skills="0", rbt_bcba_certification="No", autism_care_experience_score="0")
 def calculate_resume_score(analysis: ResumeAnalysis) -> float:
     """Calculates the weighted score out of 100."""
-    # Weights for maximum possible score contribution:
-    # Experience (40%), Skills (30%), Communication (20%), Certifications (10%)
     total_score = 0.0
     # 1. Experience Score (Max 40 points)
-    # Simple heuristic: longer summary means more experience found.
-    # Max score is 40.
-    exp_factor = min(len(analysis.experience_summary) / 100.0, 1.0) # Use 100 chars as the max point
     total_score += exp_factor * 40.0
     # 2. Skills Score (Max 30 points)
-    # Based on number of skills found (up to 10 relevant skills)
-    # Max score is 30.
     skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0)
     total_score += skills_factor * 30.0
     # 3. Communication Score (Max 20 points)
-    # Assuming 'communication_skills' is a score string '1-10' from Groq
     try:
-        # Tries to extract the first number from the string (e.g., '7-High' -> 7)
-        comm_rating = float(analysis.communication_skills.split('-')[0].strip())
     except (ValueError, IndexError):
-        comm_rating = 5.0 # Default if Groq returns unparsable text
-    score_comm = (comm_rating / 10.0) * 20.0 # Scale 1-10 rating to max 20 points
     total_score += score_comm
     # 4. Certification Score (Max 10 points)
-    # Each certification adds a point, max 10 certs.
     score_cert = min(len(analysis.certifications), 10) * 1.0
     total_score += score_cert
     # --- Therapist-Specific Bonus Checks ---
     if st.session_state.get('selected_role') == "Therapist":
-        # Additional points based on specialized scores (e.g., up to 5 points bonus)
         try:
-            aba_score = float(analysis.aba_therapy_skills.split('-')[0].strip())
-            autism_score = float(analysis.autism_care_experience_score.split('-')[0].strip())
             # Add a bonus based on the average specialized scores (max 10 points)
             specialized_bonus = ((aba_score + autism_score) / 20.0) * 10.0
             total_score += specialized_bonus
         except (ValueError, IndexError):
-            pass # Ignore if specialized scores are not numbers
-    # Final cleanup and capping
     final_score = round(min(total_score, 100))
     return float(final_score)
@@ -184,19 +188,14 @@ def calculate_resume_score(analysis: ResumeAnalysis) -> float:
 def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
     """Formats and appends the new analysis to the session state DataFrame."""
-    # Convert Pydantic model to dictionary
     data = analysis.dict()
-    # Add computed and derived fields
     data['Job Role'] = job_role
     data['Resume Score'] = score
-    data['Shortlisted'] = 'No' # Default status
-    # Clean up list fields for display/Excel
     technical_skills_list = ", ".join(data['technical_skills'])
     certifications_list = ", ".join(data['certifications'])
-    # The new row data
     df_data = {
         'Name': data['name'],
         'Job Role': job_role,
@@ -214,18 +213,14 @@ def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score:
         'Autism-Care Exp (1-10)': data['autism_care_experience_score'],
     }
-    # Convert to a single-row DataFrame and concatenate
     new_df = pd.DataFrame([df_data])
     st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
 # --- 4. APP LAYOUT AND LOGIC ---
-st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
 st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")
-# --- Tabs for User and Admin ---
 tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"])
 # =========================================================================
@@ -235,15 +230,13 @@ with tab_user:
     st.header("Upload Resumes for Analysis")
     st.info("Upload multiple PDF or DOCX files. The Groq AI engine will quickly extract and score the key data.")
-    # Job Role Selection
     job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
     selected_role = st.selectbox(
         "**1. Select the Target Job Role** (Influences analysis and scoring)",
         options=job_role_options,
-        key='selected_role' # Store role in session state for scoring logic
     )
-    # File Uploader
     uploaded_files = st.file_uploader(
         "**2. Upload Resumes** (PDF or DOCX)",
         type=["pdf", "docx"],
@@ -257,7 +250,6 @@ with tab_user:
             total_files = len(uploaded_files)
             progress_bar = st.progress(0)
-            # Clear previous individual file analysis displays
             st.session_state.individual_analysis = []
             with st.status("Processing Resumes...", expanded=True) as status_box:
@@ -266,27 +258,21 @@ with tab_user:
                     file_name = file.name
                     st.write(f"Analyzing **{file_name}**...")
-                    # 1. Extract Text
                     resume_text = extract_text_from_file(file)
                     if not resume_text:
                         st.error(f"Could not extract text from {file_name}. Skipping.")
                         continue
-                    # 2. Analyze with Groq
                     analysis = analyze_resume_with_groq(resume_text, selected_role)
                     if analysis.name == "Extraction Failed":
                          st.error(f"Groq extraction failed for {file_name}. Skipping.")
                          continue
-                    # 3. Calculate Score
                     score = calculate_resume_score(analysis)
-                    # 4. Store Data
                     append_analysis_to_dataframe(selected_role, analysis, score)
-                    # Store data for individual display below
                     st.session_state.individual_analysis.append({
                         'name': analysis.name,
                         'score': score,
@@ -294,14 +280,12 @@ with tab_user:
                         'file_name': file_name
                     })
-                    # Update progress
                     progress_bar.progress((i + 1) / total_files)
                 status_box.update(label="Analysis Complete!", state="complete", expanded=False)
             st.success(f"**✅ Successfully analyzed {total_files} resumes.**")
-    # Display results of the last batch of analysis
     if 'individual_analysis' in st.session_state and st.session_state.individual_analysis:
         st.subheader("Last Analysis Summary")
         for item in st.session_state.individual_analysis:
@@ -315,7 +299,6 @@ with tab_user:
 # =========================================================================
 with tab_admin:
-    # --- Login Logic ---
     if not st.session_state.is_admin_logged_in:
         st.header("Admin Login")
         password = st.text_input("Enter Admin Password", type="password")
@@ -325,9 +308,8 @@ with tab_admin:
                 st.rerun()
             else:
                 st.error("Incorrect password.")
-        st.stop() # Stop execution until logged in
-    # --- Dashboard Content (Logged In) ---
     st.header("🎯 Recruitment Dashboard")
     st.markdown("---")
@@ -340,14 +322,11 @@ with tab_admin:
     else:
         df = st.session_state.analyzed_data.copy()
-        # --- 1. Shortlisting & Data Display ---
         st.subheader("Candidate Data Table")
         st.success(f"**Total Candidates Analyzed: {len(df)}**")
-        # Key columns for display
         display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
-        # Editable Data Table (allowing admin to change 'Shortlisted' status)
         edited_df = st.data_editor(
             df[display_cols],
             column_config={
@@ -362,19 +341,13 @@ with tab_admin:
             hide_index=True
         )
-        # Update the session state DataFrame with the edited shortlisting status
-        # This keeps the changes persistent
         st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted']
         st.markdown("---")
-        # --- 2. Download Excel File ---
         st.subheader("📥 Download Data")
-        # The full DataFrame to export
         df_export = st.session_state.analyzed_data.copy()
-        # Create an in-memory Excel file buffer
         excel_buffer = io.BytesIO()
         with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
             df_export.to_excel(writer, index=False, sheet_name='Resume Analysis Data')

+# src/streamlit_app.py
 import streamlit as st
 import pandas as pd
 import io
 import os
+import fitz
 import docx2txt
 from groq import Groq
 from dotenv import load_dotenv
 from pydantic import BaseModel, Field
+# --- 0. FIX: SET PAGE CONFIG AS THE FIRST STREAMLIT COMMAND ---
+st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
+# --- 1. CONFIGURATION AND INITIALIZATION ---
+# FIX for .env on local machine: Load environment variables by explicitly pointing up one directory.
+load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env'))
+# FIX for Hugging Face Deployment: Read the key from the environment/Secrets.
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+# Admin Password (as requested)
+ADMIN_PASSWORD = "admin"
 # Initialize Groq Client
 if GROQ_API_KEY:
     try:
         st.error(f"Error initializing Groq Client: {e}")
         st.stop()
 else:
+    st.error("GROQ_API_KEY not found. Please ensure the key is set as a Secret in Hugging Face or in the local .env file.")
     st.stop()
 # Initialize Session State
 if 'is_admin_logged_in' not in st.session_state:
     st.session_state.is_admin_logged_in = False
 if 'analyzed_data' not in st.session_state:
     initial_cols = [
         'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
         'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
     certifications: list[str] = Field(description="List of professional certifications (e.g., PMP, AWS Certified).")
     experience_summary: str = Field(description="A concise summary of the candidate's professional experience.")
     education_summary: str = Field(description="A concise summary of the candidate's highest education.")
+    # --- FIX 1: Explicitly describe required STRING output format ---
+    communication_skills: str = Field(description="A score as a STRING (e.g., '8') or brief description of communication skills.")
     technical_skills: list[str] = Field(description="List of technical skills/technologies mentioned (e.g., Python, SQL, Docker).")
+    aba_therapy_skills: str = Field(description="Specific mention or score as a STRING (e.g., '7') for ABA Therapy skills, ONLY if the role is 'Therapist'. Use the STRING 'N/A' if not applicable or found.")
+    rbt_bcba_certification: str = Field(description="Indicate the STRING 'Yes' or 'No' if RBT/BCBA certification is mentioned, ONLY if the role is 'Therapist'. Use the STRING 'N/A' if not applicable or found.")
+    autism_care_experience_score: str = Field(description="A score as a STRING (e.g., '9') for Autism-Care Experience, ONLY if the role is 'Therapist'. Use the STRING 'N/A' if not applicable or found.")
 # --- 3. HELPER FUNCTIONS ---
     file_type = uploaded_file.type
     try:
         if file_type == "application/pdf":
             with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
                 text = ""
                 for page in doc:
                     text += page.get_text()
             return text
         elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
             return docx2txt.process(uploaded_file)
         else:
             return ""
     except Exception as e:
+        print(f"Error extracting text: {e}")
         return ""
 @st.cache_data(show_spinner="Analyzing resume with Groq...")
         therapist_instructions = (
             "Because the job role is 'Therapist', you MUST carefully look for: "
             "1. ABA Therapy Skills, RBT/BCBA Certification, and Autism-Care Experience. "
+            "2. Provide a score from 1-10 as a **STRING** (e.g., '7') for the specialized fields: 'aba_therapy_skills' and 'autism_care_experience_score'. "
+            "3. If any specialized therapist field is not found, you MUST use the **STRING** 'N/A'. "
+            "4. Set 'rbt_bcba_certification' to the **STRING** 'Yes' or 'No'."
+        )
+    else:
+        # For non-therapist roles, explicitly instruct the model to use 'N/A' for therapist fields
+        therapist_instructions = (
+            "Since the role is not 'Therapist', set 'aba_therapy_skills', 'autism_care_experience_score', and 'rbt_bcba_certification' to the **STRING** 'N/A'."
         )
     # System Prompt for Groq
         f"You are a professional Resume Analyzer. Your task is to extract specific information from the provided resume text. "
         f"The candidate is applying for the role of '{job_role}'. "
         f"Follow the instructions precisely and return a JSON object that strictly adheres to the provided Pydantic schema. "
+        f"**IMPORTANT:** All values must be returned as the data type specified. Numerical scores must be enclosed in quotes to be treated as **STRING** types (e.g., \"8\"). "
         f"For skills, provide a list of 5-10 most relevant items. {therapist_instructions}"
     )
         return analysis
     except Exception as e:
+        # This will now only catch errors related to the API connection or Pydantic structural errors
+        # (e.g., list vs string), not the common type mismatches.
+        st.error(f"Groq API Error: {e}")
         # Return an empty/default analysis object on failure
+        return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A")
 def calculate_resume_score(analysis: ResumeAnalysis) -> float:
     """Calculates the weighted score out of 100."""
     total_score = 0.0
     # 1. Experience Score (Max 40 points)
+    exp_factor = min(len(analysis.experience_summary) / 100.0, 1.0)
     total_score += exp_factor * 40.0
     # 2. Skills Score (Max 30 points)
     skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0)
     total_score += skills_factor * 30.0
     # 3. Communication Score (Max 20 points)
     try:
+        # Safely parse the communication score string (e.g., '8' or '8-High')
+        score_str = analysis.communication_skills.split('-')[0].strip()
+        comm_rating = float(score_str)
     except (ValueError, IndexError):
+        comm_rating = 5.0 # Default if unparsable
+    score_comm = (comm_rating / 10.0) * 20.0
     total_score += score_comm
     # 4. Certification Score (Max 10 points)
     score_cert = min(len(analysis.certifications), 10) * 1.0
     total_score += score_cert
     # --- Therapist-Specific Bonus Checks ---
     if st.session_state.get('selected_role') == "Therapist":
         try:
+            # Safely parse specialized scores, handling 'N/A'
+            aba_score = float(analysis.aba_therapy_skills.split('-')[0].strip()) if analysis.aba_therapy_skills != 'N/A' else 0.0
+            autism_score = float(analysis.autism_care_experience_score.split('-')[0].strip()) if analysis.autism_care_experience_score != 'N/A' else 0.0
             # Add a bonus based on the average specialized scores (max 10 points)
             specialized_bonus = ((aba_score + autism_score) / 20.0) * 10.0
             total_score += specialized_bonus
         except (ValueError, IndexError):
+            pass
     final_score = round(min(total_score, 100))
     return float(final_score)
 def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
     """Formats and appends the new analysis to the session state DataFrame."""
     data = analysis.dict()
     data['Job Role'] = job_role
     data['Resume Score'] = score
+    data['Shortlisted'] = 'No'
     technical_skills_list = ", ".join(data['technical_skills'])
     certifications_list = ", ".join(data['certifications'])
     df_data = {
         'Name': data['name'],
         'Job Role': job_role,
         'Autism-Care Exp (1-10)': data['autism_care_experience_score'],
     }
     new_df = pd.DataFrame([df_data])
     st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
 # --- 4. APP LAYOUT AND LOGIC ---
 st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")
 tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"])
 # =========================================================================
     st.header("Upload Resumes for Analysis")
     st.info("Upload multiple PDF or DOCX files. The Groq AI engine will quickly extract and score the key data.")
     job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
     selected_role = st.selectbox(
         "**1. Select the Target Job Role** (Influences analysis and scoring)",
         options=job_role_options,
+        key='selected_role'
     )
     uploaded_files = st.file_uploader(
         "**2. Upload Resumes** (PDF or DOCX)",
         type=["pdf", "docx"],
             total_files = len(uploaded_files)
             progress_bar = st.progress(0)
             st.session_state.individual_analysis = []
             with st.status("Processing Resumes...", expanded=True) as status_box:
                     file_name = file.name
                     st.write(f"Analyzing **{file_name}**...")
                     resume_text = extract_text_from_file(file)
                     if not resume_text:
                         st.error(f"Could not extract text from {file_name}. Skipping.")
                         continue
                     analysis = analyze_resume_with_groq(resume_text, selected_role)
                     if analysis.name == "Extraction Failed":
                          st.error(f"Groq extraction failed for {file_name}. Skipping.")
                          continue
                     score = calculate_resume_score(analysis)
                     append_analysis_to_dataframe(selected_role, analysis, score)
                     st.session_state.individual_analysis.append({
                         'name': analysis.name,
                         'score': score,
                         'file_name': file_name
                     })
                     progress_bar.progress((i + 1) / total_files)
                 status_box.update(label="Analysis Complete!", state="complete", expanded=False)
             st.success(f"**✅ Successfully analyzed {total_files} resumes.**")
     if 'individual_analysis' in st.session_state and st.session_state.individual_analysis:
         st.subheader("Last Analysis Summary")
         for item in st.session_state.individual_analysis:
 # =========================================================================
 with tab_admin:
     if not st.session_state.is_admin_logged_in:
         st.header("Admin Login")
         password = st.text_input("Enter Admin Password", type="password")
                 st.rerun()
             else:
                 st.error("Incorrect password.")
+        st.stop()
     st.header("🎯 Recruitment Dashboard")
     st.markdown("---")
     else:
         df = st.session_state.analyzed_data.copy()
         st.subheader("Candidate Data Table")
         st.success(f"**Total Candidates Analyzed: {len(df)}**")
         display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
         edited_df = st.data_editor(
             df[display_cols],
             column_config={
             hide_index=True
         )
         st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted']
         st.markdown("---")
         st.subheader("📥 Download Data")
         df_export = st.session_state.analyzed_data.copy()
         excel_buffer = io.BytesIO()
         with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
             df_export.to_excel(writer, index=False, sheet_name='Resume Analysis Data')