Spaces:

Mangesh223
/

DefendModel

Sleeping

App Files Files Community

Mangesh223 commited on Apr 1, 2025

Commit

b5f8089

verified ·

1 Parent(s): 3027706

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -45

app.py CHANGED Viewed

@@ -4,82 +4,192 @@ import PyPDF2
 import docx
 import requests
 import json
-# Text extraction functions (unchanged)
-def extract_text_from_pdf(file):
-    pdf_reader = PyPDF2.PdfReader(file)
-    return " ".join(page.extract_text() for page in pdf_reader.pages)
-def extract_text_from_docx(file):
-    doc = docx.Document(file)
-    return "\n".join(para.text for para in doc.paragraphs)
-def process_uploaded_file(file):
-    if file.name.lower().endswith(".pdf"):
         return extract_text_from_pdf(file)
-    elif file.name.lower().endswith(".docx"):
         return extract_text_from_docx(file)
-    raise ValueError("Unsupported file format")
-# Updated Hugging Face API function
-def analyze_with_huggingface(file, job_description):
     try:
         resume_text = process_uploaded_file(file)
         HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
-        API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
         prompt = """<s>[INST] <<SYS>>
-You are an ATS resume analyzer. Return ONLY this JSON format:
 {
-  "ATS Parameters": {
-    "Keywords": {"Match": 0-100, "Recommendation": []},
-    "Formatting": {"Match": 0-100, "Recommendation": []},
-    "Skills": {"Match": 0-100, "Recommendation": []},
-    "Experience": {"Match": 0-100, "Recommendation": []},
-    "Education": {"Match": 0-100, "Recommendation": []}
   },
-  "Score": {
-    "Overall": 0-100,
-    "Details": {"Keywords": 0-100, "Formatting": 0-100, "Skills": 0-100, "Experience": 0-100, "Education": 0-100}
-  }
 }
 <</SYS>>
 Resume: {resume}
 Job Description: {jd}
-[/INST]""".format(resume=resume_text[:3000], jd=job_description[:1000])
         response = requests.post(
-            API_URL,
             headers={"Authorization": f"Bearer {HF_API_KEY}"},
             json={
                 "inputs": prompt,
-                "parameters": {"max_new_tokens": 800, "temperature": 0.7}
             },
-            timeout=30
         )
-        # Extract JSON from response
-        result = response.json()[0]['generated_text']
-        json_str = result[result.find('{'):result.rfind('}')+1]
-        return json.loads(json_str)
     except Exception as e:
-        return {"error": str(e)}
-# Gradio Interface
-with gr.Blocks() as demo:
-    gr.Markdown("## Resume Analyzer")
-    with gr.Row():
-        file_input = gr.File(label="Upload Resume", file_types=[".pdf", ".docx"])
-        jd_input = gr.Textbox(label="Job Description", lines=10)
-    analyze_btn = gr.Button("Analyze")
-    output = gr.JSON()
     analyze_btn.click(
-        fn=analyze_with_huggingface,
         inputs=[file_input, jd_input],
-        outputs=output
     )
-demo.queue().launch(allowed_paths=["*"])

 import docx
 import requests
 import json
+import re
+from typing import Union, Dict, Any
+# Text extraction functions
+def extract_text_from_pdf(file) -> str:
+    """Extract text from PDF file"""
+    try:
+        pdf_reader = PyPDF2.PdfReader(file)
+        return " ".join(page.extract_text() or "" for page in pdf_reader.pages)
+    except Exception as e:
+        raise ValueError(f"PDF extraction failed: {str(e)}")
+def extract_text_from_docx(file) -> str:
+    """Extract text from Word document"""
+    try:
+        doc = docx.Document(file)
+        return "\n".join(para.text for para in doc.paragraphs if para.text)
+    except Exception as e:
+        raise ValueError(f"DOCX extraction failed: {str(e)}")
+def process_uploaded_file(file) -> str:
+    """Process uploaded file based on its type"""
+    if not file.name:
+        raise ValueError("No filename provided")
+    filename = file.name.lower()
+    if filename.endswith(".pdf"):
         return extract_text_from_pdf(file)
+    elif filename.endswith(".docx"):
         return extract_text_from_docx(file)
+    raise ValueError("Unsupported file format. Please upload PDF or Word document.")
+def extract_json_from_text(text: str) -> Union[Dict[str, Any], None]:
+    """Extract JSON from text response with robust error handling"""
     try:
+        # First try parsing the entire text as JSON
+        return json.loads(text)
+    except json.JSONDecodeError:
+        # If that fails, try to find JSON within the text
+        json_match = re.search(r'\{[\s\S]*\}', text)
+        if json_match:
+            try:
+                return json.loads(json_match.group())
+            except json.JSONDecodeError:
+                pass
+    return None
+def analyze_with_huggingface(file, job_description: str) -> Dict[str, Any]:
+    """Analyze resume using Hugging Face API with enhanced error handling"""
+    try:
+        # Process file and validate inputs
         resume_text = process_uploaded_file(file)
+        if not resume_text.strip():
+            return {"error": "Extracted resume text is empty"}
         HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
+        if not HF_API_KEY:
+            return {"error": "Hugging Face API key not configured"}
+        # Prepare the prompt with strict JSON instructions
         prompt = """<s>[INST] <<SYS>>
+You are an ATS resume analyzer. Return ONLY valid JSON with this exact structure:
 {
+  "analysis": {
+    "keywords": {"score": 0-100, "recommendations": []},
+    "formatting": {"score": 0-100, "recommendations": []},
+    "skills": {"score": 0-100, "recommendations": []},
+    "experience": {"score": 0-100, "recommendations": []},
+    "education": {"score": 0-100, "recommendations": []}
   },
+  "overall_score": 0-100,
+  "summary": "Brief overall assessment"
 }
+Important:
+1. Generate actual scores based on content
+2. Provide specific recommendations
+3. Return ONLY the JSON object
+4. No additional text or explanations
 <</SYS>>
 Resume: {resume}
 Job Description: {jd}
+[/INST]""".format(
+            resume=resume_text[:3000],  # Truncate to prevent token limit issues
+            jd=job_description[:1000]
+        )
+        # Make API request
         response = requests.post(
+            "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3",
             headers={"Authorization": f"Bearer {HF_API_KEY}"},
             json={
                 "inputs": prompt,
+                "parameters": {
+                    "max_new_tokens": 800,
+                    "temperature": 0.3,  # Lower for more consistent JSON
+                    "do_sample": False   # Disable randomness
+                }
             },
+            timeout=45  # Increased timeout
         )
+        # Handle API errors
+        if response.status_code != 200:
+            error_msg = response.json().get("error", "Unknown API error")
+            return {"error": f"API request failed: {error_msg}"}
+        # Process response
+        raw_output = response.json()[0]['generated_text']
+        result = extract_json_from_text(raw_output)
+        if not result:
+            return {
+                "error": "Could not extract valid JSON",
+                "raw_response": raw_output  # Include raw response for debugging
+            }
+        # Validate JSON structure
+        required_keys = {"analysis", "overall_score"}
+        if not all(key in result for key in required_keys):
+            return {
+                "error": "Incomplete analysis in response",
+                "partial_response": result
+            }
+        return result
+    except requests.exceptions.RequestException as e:
+        return {"error": f"Network error: {str(e)}"}
     except Exception as e:
+        return {"error": f"Analysis failed: {str(e)}"}
+# Enhanced Gradio Interface
+with gr.Blocks(title="ATS Resume Analyzer", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""# ATS Resume Analyzer
+    Upload your resume and job description to get ATS compatibility analysis""")
+    with gr.Row():
+        with gr.Column():
+            file_input = gr.File(
+                label="Upload Resume (PDF or DOCX)",
+                file_types=[".pdf", ".docx"],
+                type="filepath"
+            )
+            jd_input = gr.Textbox(
+                label="Job Description",
+                lines=8,
+                placeholder="Paste the job description here..."
+            )
+            analyze_btn = gr.Button("Analyze", variant="primary")
+        with gr.Column():
+            output_tabs = gr.Tabs()
+            with output_tabs:
+                with gr.Tab("Structured Analysis", id="json"):
+                    json_output = gr.JSON(label="Analysis Results")
+                with gr.Tab("Raw Response", id="raw"):
+                    raw_output = gr.Textbox(label="API Response", interactive=False)
+            status = gr.Textbox(label="Status", interactive=False)
+    def display_results(file, job_description):
+        """Handle results display with proper error handling"""
+        result = analyze_with_huggingface(file, job_description)
+        if "error" in result:
+            return {
+                json_output: None,
+                raw_output: result.get("raw_response", str(result)),
+                status: f"Error: {result['error']}"
+            }
+        return {
+            json_output: result,
+            raw_output: json.dumps(result, indent=2),
+            status: "Analysis complete!"
+        }
     analyze_btn.click(
+        fn=display_results,
         inputs=[file_input, jd_input],
+        outputs=[json_output, raw_output, status]
     )
+# Launch with queueing and CORS support
+if __name__ == "__main__":
+    demo.queue(concurrency_count=3).launch(
+        allowed_paths=["*"],
+        server_port=7860
+    )