Spaces:

LovnishVerma
/

ResumeDataExtractor

Running

App Files Files Community

LovnishVerma commited on 8 days ago

Commit

505be46

verified ·

1 Parent(s): 9281337

Update parser_logic.py

Browse files

Files changed (1) hide show

parser_logic.py +50 -25

parser_logic.py CHANGED Viewed

@@ -5,20 +5,32 @@ import fitz  # PyMuPDF
 from google import genai
 from google.genai import types
 from dotenv import load_dotenv
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 load_dotenv()
 api_key = os.getenv("GEMINI_API_KEY")
 if not api_key:
-    raise ValueError("GEMINI_API_KEY is missing from Secrets.")
 client = genai.Client(api_key=api_key)
 def extract_text_from_stream(file_bytes: bytes) -> str:
     text = ""
     try:
         with fitz.open(stream=file_bytes, filetype="pdf") as doc:
@@ -29,32 +41,45 @@ def extract_text_from_stream(file_bytes: bytes) -> str:
         raise ValueError("Failed to extract text from PDF.")
     return text
 def parse_resume_with_ai(resume_text: str) -> dict:
-    prompt = """
-    Extract the following information from the resume text below.
-    Return STRICTLY valid JSON with these fields:
-    {
-      "name": "",
-      "email": "",
-      "phone": "",
-      "skills": [],
-      "summary": ""
-    }
     """
-    try:
-        response = client.models.generate_content(
-            model="gemini-pro",  # ✅ FIXED MODEL
-            contents=prompt + "\n\n" + resume_text[:10000],
-            config=types.GenerateContentConfig(
-                response_mime_type="application/json"
             )
-        )
-        return json.loads(response.text)
-    except Exception as e:
-        logger.error(f"AI Processing Error: {e}")
-        return {"error": f"AI Processing failed: {str(e)}"}

 from google import genai
 from google.genai import types
 from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+from typing import List, Optional
+# Configure Logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 load_dotenv()
+# Secure Configuration
 api_key = os.getenv("GEMINI_API_KEY")
 if not api_key:
+    raise ValueError("GEMINI_API_KEY is missing.")
 client = genai.Client(api_key=api_key)
+# --- 1. Define Strict Schema (Production Best Practice) ---
+class ResumeSchema(BaseModel):
+    name: Optional[str] = Field(None, description="Candidate's full name")
+    email: Optional[str] = Field(None, description="Email address")
+    phone: Optional[str] = Field(None, description="Phone number")
+    skills: List[str] = Field(default_factory=list, description="List of technical skills")
+    summary: Optional[str] = Field(None, description="Brief professional summary")
 def extract_text_from_stream(file_bytes: bytes) -> str:
+    """Extracts raw text content from PDF bytes directly in memory."""
     text = ""
     try:
         with fitz.open(stream=file_bytes, filetype="pdf") as doc:
         raise ValueError("Failed to extract text from PDF.")
     return text
 def parse_resume_with_ai(resume_text: str) -> dict:
     """
+    Production-grade parser with Model Fallback and Schema Validation.
+    """
+    prompt = """
+    Extract structured data from this resume.
+    Return strictly valid JSON matching the requested schema.
+    """
+    # Define models to try in order of preference
+    # 1. Flash (Fast, Cheap)
+    # 2. Pro (Older, but highly stable on v1beta)
+    models_to_try = ["gemini-1.5-flash", "gemini-1.5-pro", "gemini-pro"]
+    last_exception = None
+    for model_name in models_to_try:
+        try:
+            logger.info(f"Attempting to parse using model: {model_name}")
+            response = client.models.generate_content(
+                model=model_name,
+                contents=prompt + "\n\n" + resume_text[:10000],
+                config=types.GenerateContentConfig(
+                    response_mime_type="application/json",
+                    response_schema=ResumeSchema # Pydantic schema enforcement
+                )
             )
+            # If successful, parse and return
+            if response.text:
+                data = json.loads(response.text)
+                return data
+        except Exception as e:
+            logger.warning(f"Model {model_name} failed: {e}")
+            last_exception = e
+            # Continue to the next model in the list...
+    # If all models fail, return the error
+    logger.error("All models failed to process the resume.")
+    return {"error": f"Processing failed. Root cause: {str(last_exception)}"}