Spaces:

LovnishVerma
/

ResumeDataExtractor

Running

App Files Files Community

LovnishVerma commited on 4 days ago

Commit

29fc8f7

verified ·

1 Parent(s): 6dab387

Update parser_logic.py

Browse files

Files changed (1) hide show

parser_logic.py +72 -38

parser_logic.py CHANGED Viewed

@@ -6,66 +6,100 @@ import fitz  # PyMuPDF
 import google.generativeai as genai
 from dotenv import load_dotenv
-# Configure Logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 load_dotenv()
-# Secure Configuration
 api_key = os.getenv("GEMINI_API_KEY")
 if not api_key:
-    logger.error("GEMINI_API_KEY not found in environment variables.")
     raise ValueError("GEMINI_API_KEY is missing.")
 genai.configure(api_key=api_key)
-model = genai.GenerativeModel('gemini-1.5-flash')
 def extract_text_from_stream(file_bytes: bytes) -> str:
-    """Extracts raw text content from PDF bytes directly in memory."""
     text = ""
     try:
-        # stream=file_bytes tells PyMuPDF to read from memory, not disk
         with fitz.open(stream=file_bytes, filetype="pdf") as doc:
             for page in doc:
                 text += page.get_text()
     except Exception as e:
         logger.error(f"PDF Extraction Error: {e}")
-        raise ValueError("Failed to extract text from PDF. File may be corrupted.")
     return text
-def parse_resume_with_ai(resume_text: str) -> dict:
-    """Uses GenAI to transform unstructured text into JSON."""
-    # Prompt Engineering: Added instructions for "null" values to keep schema consistent
-    prompt = f"""
-    Acting as an expert recruiter, extract the following data from this resume text:
-    - name (string)
-    - email (string)
-    - phone (string)
-    - skills (array of strings)
-    - summary (string, max 2 sentences)
-    If a field is not found, return null or an empty list.
-    Return strictly valid JSON. Do not include markdown formatting.
-    Resume Text:
-    {resume_text[:10000]}
-    """
-    # Truncate text to 10k chars to avoid token limits if user uploads a book
-    try:
-        response = model.generate_content(prompt)
-        # Robust Cleaning: Remove Markdown, newlines, and non-json text
-        raw_output = response.text.strip()
-        # Remove ```json and ``` identifiers if present
-        clean_json = re.sub(r'```json\s*|```', '', raw_output, flags=re.MULTILINE).strip()
-        return json.loads(clean_json)
-    except json.JSONDecodeError as e:
-        logger.error(f"JSON Decode Error. Raw AI Output: {response.text}")
-        return {"error": "AI response was not valid JSON", "raw_output": response.text}
-    except Exception as e:
-        logger.error(f"AI Processing Error: {e}")
-        return {"error": f"AI Processing failed: {str(e)}"}

 import google.generativeai as genai
 from dotenv import load_dotenv
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 load_dotenv()
 api_key = os.getenv("GEMINI_API_KEY")
 if not api_key:
     raise ValueError("GEMINI_API_KEY is missing.")
 genai.configure(api_key=api_key)
 def extract_text_from_stream(file_bytes: bytes) -> str:
     text = ""
     try:
         with fitz.open(stream=file_bytes, filetype="pdf") as doc:
             for page in doc:
                 text += page.get_text()
     except Exception as e:
         logger.error(f"PDF Extraction Error: {e}")
+        raise ValueError("Failed to extract text from PDF.")
     return text
+def analyze_resume(resume_text: str, job_description: str = None) -> dict:
+    """
+    Analyzes resume. If JD is provided, performs matching.
+    """
+    # Base prompt (Extraction only)
+    base_instructions = """
+    Extract structured data from the resume.
+    """
+    # Extended prompt (Matching)
+    if job_description:
+        prompt = f"""
+        Act as a strict AI Recruiter. Compare the Resume against the Job Description.
+        RETURN JSON ONLY with this exact structure:
+        {{
+            "candidate": {{
+                "name": "string",
+                "email": "string",
+                "phone": "string",
+                "skills": ["list", "of", "candidate", "skills"],
+                "experience_years": "string or null"
+            }},
+            "match_analysis": {{
+                "score": integer_0_to_100,
+                "reasoning": "brief summary of why this score was given",
+                "matching_skills": ["skills in both resume and JD"],
+                "missing_skills": ["skills in JD but NOT in resume"],
+                "verdict": "Interview" | "Shortlist" | "Reject"
+            }}
+        }}
+        JOB DESCRIPTION:
+        {job_description[:5000]}
+        RESUME TEXT:
+        {resume_text[:10000]}
+        """
+    else:
+        # Fallback to simple extraction if no JD
+        prompt = f"""
+        Extract structured data from the resume. Return JSON:
+        {{
+            "candidate": {{
+                "name": "string",
+                "email": "string",
+                "phone": "string",
+                "skills": ["list", "of", "skills"],
+                "summary": "string"
+            }}
+        }}
+        RESUME TEXT:
+        {resume_text[:10000]}
+        """
+    # Model Strategy: Try Flash first, fallback to Pro
+    models = ['gemini-1.5-flash', 'gemini-pro']
+    for model_name in models:
+        try:
+            model = genai.GenerativeModel(model_name)
+            response = model.generate_content(prompt)
+            # Clean JSON
+            raw = response.text.strip()
+            clean_json = re.sub(r'```json\s*|```', '', raw, flags=re.MULTILINE).strip()
+            return json.loads(clean_json)
+        except Exception as e:
+            logger.warning(f"Model {model_name} failed: {e}")
+            if model_name == models[-1]:
+                return {"error": f"Analysis failed. Detail: {str(e)}"}
+            continue