import os import gradio as gr import PyPDF2 import docx import requests import json import re from typing import Union, Dict, Any # Text extraction functions def extract_text_from_pdf(file) -> str: """Extract text from PDF file""" try: pdf_reader = PyPDF2.PdfReader(file) return " ".join(page.extract_text() or "" for page in pdf_reader.pages) except Exception as e: raise ValueError(f"PDF extraction failed: {str(e)}") def extract_text_from_docx(file) -> str: """Extract text from Word document""" try: doc = docx.Document(file) return "\n".join(para.text for para in doc.paragraphs if para.text) except Exception as e: raise ValueError(f"DOCX extraction failed: {str(e)}") def process_uploaded_file(file) -> str: """Process uploaded file based on its type""" if not file.name: raise ValueError("No filename provided") filename = file.name.lower() if filename.endswith(".pdf"): return extract_text_from_pdf(file) elif filename.endswith(".docx"): return extract_text_from_docx(file) raise ValueError("Unsupported file format. Please upload PDF or Word document.") def extract_json_from_text(text: str) -> Union[Dict[str, Any], None]: """Extract JSON from text response with robust error handling""" try: # First try parsing the entire text as JSON return json.loads(text) except json.JSONDecodeError: # If that fails, try to find JSON within the text json_match = re.search(r'\{[\s\S]*\}', text) if json_match: try: return json.loads(json_match.group()) except json.JSONDecodeError: pass return None def analyze_with_huggingface(file, job_description: str) -> Dict[str, Any]: """Analyze resume using Hugging Face API with enhanced error handling""" try: # Process file and validate inputs resume_text = process_uploaded_file(file) if not resume_text.strip(): return {"error": "Extracted resume text is empty"} HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY") if not HF_API_KEY: return {"error": "Hugging Face API key not configured"} # Prepare the prompt with strict JSON instructions prompt = """[INST] <> You are an ATS resume analyzer. Return ONLY valid JSON with this exact structure: { "analysis": { "keywords": {"score": 0-100, "recommendations": []}, "formatting": {"score": 0-100, "recommendations": []}, "skills": {"score": 0-100, "recommendations": []}, "experience": {"score": 0-100, "recommendations": []}, "education": {"score": 0-100, "recommendations": []} }, "overall_score": 0-100, "summary": "Brief overall assessment" } Important: 1. Generate actual scores based on content 2. Provide specific recommendations 3. Return ONLY the JSON object 4. No additional text or explanations <> Resume: {resume} Job Description: {jd} [/INST]""".format( resume=resume_text[:3000], jd=job_description[:1000] ) # Make API request response = requests.post( "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3", headers={"Authorization": f"Bearer {HF_API_KEY}"}, json={ "inputs": prompt, "parameters": { "max_new_tokens": 800, "temperature": 0.3, "do_sample": False } }, timeout=45 ) # Handle API errors if response.status_code != 200: error_msg = response.json().get("error", "Unknown API error") return { "error": f"API request failed: {error_msg}", "api_response": response.text } # Process response raw_output = response.json()[0]['generated_text'] # Try to extract JSON result = extract_json_from_text(raw_output) if result: # Validate JSON structure required_keys = {"analysis", "overall_score"} if all(key in result for key in required_keys): return { "structured_result": result, "raw_response": raw_output } # If JSON extraction failed, return raw output return { "raw_response": raw_output, "error": "Could not extract valid JSON" } except requests.exceptions.RequestException as e: return {"error": f"Network error: {str(e)}"} except Exception as e: return {"error": f"Analysis failed: {str(e)}"} # Enhanced Gradio Interface with gr.Blocks(title="ATS Resume Analyzer", theme=gr.themes.Soft()) as demo: gr.Markdown("""# ATS Resume Analyzer""") with gr.Row(): with gr.Column(): file_input = gr.File( label="Upload Resume (PDF or DOCX)", file_types=[".pdf", ".docx"], type="filepath" ) jd_input = gr.Textbox( label="Job Description", lines=8, placeholder="Paste the job description here..." ) analyze_btn = gr.Button("Analyze", variant="primary") with gr.Column(): output_tabs = gr.Tabs() with output_tabs: with gr.Tab("Analysis Results"): json_output = gr.JSON(label="Structured Analysis") summary_output = gr.Textbox(label="Summary", interactive=False) with gr.Tab("API Response"): raw_output = gr.Textbox(label="Raw API Response", lines=10) with gr.Tab("Debug Info"): status_output = gr.Textbox(label="Status Info", lines=5) status = gr.Textbox(label="Status", interactive=False) def display_results(file, job_description): """Handle results display with proper error handling""" result = analyze_with_huggingface(file, job_description) output = { json_output: None, summary_output: None, raw_output: None, status_output: None, status: "" } if "error" in result: output.update({ status: f"Error: {result['error']}", status_output: str(result), raw_output: result.get("api_response") or result.get("raw_response") or "" }) elif "structured_result" in result: structured = result["structured_result"] output.update({ json_output: structured["analysis"], summary_output: structured.get("summary", ""), raw_output: result["raw_response"], status: "Analysis complete!", status_output: "Successfully parsed JSON response" }) else: output.update({ raw_output: result.get("raw_response", "No response received"), status: "Received non-JSON response", status_output: "The API returned a response but it couldn't be parsed as JSON" }) return output analyze_btn.click( fn=display_results, inputs=[file_input, jd_input], outputs=[json_output, summary_output, raw_output, status_output, status] ) if __name__ == "__main__": demo.queue().launch( allowed_paths=["*"], )