Spaces:

LovnishVerma
/

ResumeDataExtractor

Running

App Files Files Community

LovnishVerma commited on 4 days ago

Commit

97cdcdc

verified ·

1 Parent(s): c4ce8da

Update parser_logic.py

Browse files

Files changed (1) hide show

parser_logic.py +68 -70

parser_logic.py CHANGED Viewed

@@ -1,71 +1,69 @@
-import os
-import json
-import re
-import logging
-import fitz  # PyMuPDF
-import google.generativeai as genai
-from dotenv import load_dotenv
-# Configure Logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-load_dotenv()
-# Secure Configuration
-api_key = os.getenv("GEMINI_API_KEY")
-if not api_key:
-    logger.error("GEMINI_API_KEY not found in environment variables.")
-    raise ValueError("GEMINI_API_KEY is missing.")
-genai.configure(api_key=api_key)
-model = genai.GenerativeModel('gemini-1.5-flash')
-def extract_text_from_stream(file_bytes: bytes) -> str:
-    """Extracts raw text content from PDF bytes directly in memory."""
-    text = ""
-    try:
-        # stream=file_bytes tells PyMuPDF to read from memory, not disk
-        with fitz.open(stream=file_bytes, filetype="pdf") as doc:
-            for page in doc:
-                text += page.get_text()
-    except Exception as e:
-        logger.error(f"PDF Extraction Error: {e}")
-        raise ValueError("Failed to extract text from PDF. File may be corrupted.")
-    return text
-def parse_resume_with_ai(resume_text: str) -> dict:
-    """Uses GenAI to transform unstructured text into JSON."""
-    # Prompt Engineering: Added instructions for "null" values to keep schema consistent
-    prompt = f"""
-    Acting as an expert recruiter, extract the following data from this resume text:
-    - name (string)
-    - email (string)
-    - phone (string)
-    - skills (array of strings)
-    - summary (string, max 2 sentences)
-    If a field is not found, return null or an empty list.
-    Return strictly valid JSON. Do not include markdown formatting.
-    Resume Text:
-    {resume_text[:10000]}
-    """
-    # Truncate text to 10k chars to avoid token limits if user uploads a book
-    try:
-        response = model.generate_content(prompt)
-        # Robust Cleaning: Remove Markdown, newlines, and non-json text
-        raw_output = response.text.strip()
-        # Remove ```json and ``` identifiers if present
-        clean_json = re.sub(r'```json\s*|```', '', raw_output, flags=re.MULTILINE).strip()
-        return json.loads(clean_json)
-    except json.JSONDecodeError as e:
-        logger.error(f"JSON Decode Error. Raw AI Output: {response.text}")
-        return {"error": "AI response was not valid JSON", "raw_output": response.text}
-    except Exception as e:
-        logger.error(f"AI Processing Error: {e}")
         return {"error": f"AI Processing failed: {str(e)}"}

+import os
+import json
+import re
+import logging
+import fitz  # PyMuPDF
+from google import genai
+from dotenv import load_dotenv
+# Configure Logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+load_dotenv()
+# Secure Configuration
+api_key = os.getenv("GEMINI_API_KEY")
+if not api_key:
+    logger.error("GEMINI_API_KEY not found in environment variables.")
+    raise ValueError("GEMINI_API_KEY is missing.")
+# NEW SDK INITIALIZATION
+client = genai.Client(api_key=api_key)
+def extract_text_from_stream(file_bytes: bytes) -> str:
+    """Extracts raw text content from PDF bytes directly in memory."""
+    text = ""
+    try:
+        with fitz.open(stream=file_bytes, filetype="pdf") as doc:
+            for page in doc:
+                text += page.get_text()
+    except Exception as e:
+        logger.error(f"PDF Extraction Error: {e}")
+        raise ValueError("Failed to extract text from PDF. File may be corrupted.")
+    return text
+def parse_resume_with_ai(resume_text: str) -> dict:
+    """Uses GenAI to transform unstructured text into JSON."""
+    prompt = f"""
+    Acting as an expert recruiter, extract the following data from this resume text:
+    - name (string)
+    - email (string)
+    - phone (string)
+    - skills (array of strings)
+    - summary (string, max 2 sentences)
+    If a field is not found, return null or an empty list.
+    Return strictly valid JSON. Do not include markdown formatting.
+    Resume Text:
+    {resume_text[:10000]}
+    """
+    try:
+        # NEW SDK CALL
+        response = client.models.generate_content(
+            model="gemini-1.5-flash",
+            contents=prompt
+        )
+        # Robust Cleaning
+        raw_output = response.text.strip()
+        clean_json = re.sub(r'```json\s*|```', '', raw_output, flags=re.MULTILINE).strip()
+        return json.loads(clean_json)
+    except Exception as e:
+        logger.error(f"AI Processing Error: {e}")
         return {"error": f"AI Processing failed: {str(e)}"}