Spaces:

LovnishVerma
/

ResumeDataExtractor

Sleeping

App Files Files Community

LovnishVerma commited on Dec 30, 2025

Commit

c924798

verified ·

1 Parent(s): 97cdcdc

Update parser_logic.py

Browse files

Files changed (1) hide show

parser_logic.py +15 -19

parser_logic.py CHANGED Viewed

@@ -4,6 +4,7 @@ import re
 import logging
 import fitz  # PyMuPDF
 from google import genai
 from dotenv import load_dotenv
 # Configure Logging
@@ -15,10 +16,10 @@ load_dotenv()
 # Secure Configuration
 api_key = os.getenv("GEMINI_API_KEY")
 if not api_key:
-    logger.error("GEMINI_API_KEY not found in environment variables.")
     raise ValueError("GEMINI_API_KEY is missing.")
-# NEW SDK INITIALIZATION
 client = genai.Client(api_key=api_key)
 def extract_text_from_stream(file_bytes: bytes) -> str:
@@ -30,40 +31,35 @@ def extract_text_from_stream(file_bytes: bytes) -> str:
                 text += page.get_text()
     except Exception as e:
         logger.error(f"PDF Extraction Error: {e}")
-        raise ValueError("Failed to extract text from PDF. File may be corrupted.")
     return text
 def parse_resume_with_ai(resume_text: str) -> dict:
     """Uses GenAI to transform unstructured text into JSON."""
     prompt = f"""
-    Acting as an expert recruiter, extract the following data from this resume text:
-    - name (string)
-    - email (string)
-    - phone (string)
-    - skills (array of strings)
-    - summary (string, max 2 sentences)
-    If a field is not found, return null or an empty list.
-    Return strictly valid JSON. Do not include markdown formatting.
     Resume Text:
     {resume_text[:10000]}
     """
     try:
-        # NEW SDK CALL
         response = client.models.generate_content(
             model="gemini-1.5-flash",
-            contents=prompt
         )
-        # Robust Cleaning
-        raw_output = response.text.strip()
-        clean_json = re.sub(r'```json\s*|```', '', raw_output, flags=re.MULTILINE).strip()
-        return json.loads(clean_json)
     except Exception as e:
         logger.error(f"AI Processing Error: {e}")
         return {"error": f"AI Processing failed: {str(e)}"}

 import logging
 import fitz  # PyMuPDF
 from google import genai
+from google.genai import types
 from dotenv import load_dotenv
 # Configure Logging
 # Secure Configuration
 api_key = os.getenv("GEMINI_API_KEY")
 if not api_key:
+    logger.error("GEMINI_API_KEY not found.")
     raise ValueError("GEMINI_API_KEY is missing.")
+# Initialize the NEW Client
 client = genai.Client(api_key=api_key)
 def extract_text_from_stream(file_bytes: bytes) -> str:
                 text += page.get_text()
     except Exception as e:
         logger.error(f"PDF Extraction Error: {e}")
+        raise ValueError("Failed to extract text from PDF.")
     return text
 def parse_resume_with_ai(resume_text: str) -> dict:
     """Uses GenAI to transform unstructured text into JSON."""
     prompt = f"""
+    Extract the following data from this resume text:
+    - name, email, phone, skills (list), and summary.
+    Return strictly valid JSON.
     Resume Text:
     {resume_text[:10000]}
     """
     try:
+        # NEW SDK METHOD
         response = client.models.generate_content(
             model="gemini-1.5-flash",
+            contents=prompt,
+            config=types.GenerateContentConfig(
+                response_mime_type="application/json"
+            )
         )
+        # Parse the JSON response directly
+        return json.loads(response.text)
     except Exception as e:
         logger.error(f"AI Processing Error: {e}")
+        # Fallback to plain text error if JSON fails
         return {"error": f"AI Processing failed: {str(e)}"}