LovnishVerma commited on
Commit
505be46
·
verified ·
1 Parent(s): 9281337

Update parser_logic.py

Browse files
Files changed (1) hide show
  1. parser_logic.py +50 -25
parser_logic.py CHANGED
@@ -5,20 +5,32 @@ import fitz # PyMuPDF
5
  from google import genai
6
  from google.genai import types
7
  from dotenv import load_dotenv
 
 
8
 
 
9
  logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger(__name__)
11
 
12
  load_dotenv()
13
 
 
14
  api_key = os.getenv("GEMINI_API_KEY")
15
  if not api_key:
16
- raise ValueError("GEMINI_API_KEY is missing from Secrets.")
17
 
18
  client = genai.Client(api_key=api_key)
19
 
 
 
 
 
 
 
 
20
 
21
  def extract_text_from_stream(file_bytes: bytes) -> str:
 
22
  text = ""
23
  try:
24
  with fitz.open(stream=file_bytes, filetype="pdf") as doc:
@@ -29,32 +41,45 @@ def extract_text_from_stream(file_bytes: bytes) -> str:
29
  raise ValueError("Failed to extract text from PDF.")
30
  return text
31
 
32
-
33
  def parse_resume_with_ai(resume_text: str) -> dict:
34
- prompt = """
35
- Extract the following information from the resume text below.
36
- Return STRICTLY valid JSON with these fields:
37
-
38
- {
39
- "name": "",
40
- "email": "",
41
- "phone": "",
42
- "skills": [],
43
- "summary": ""
44
- }
45
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- try:
48
- response = client.models.generate_content(
49
- model="gemini-pro", # FIXED MODEL
50
- contents=prompt + "\n\n" + resume_text[:10000],
51
- config=types.GenerateContentConfig(
52
- response_mime_type="application/json"
 
 
 
 
 
53
  )
54
- )
55
-
56
- return json.loads(response.text)
 
 
 
 
 
 
 
57
 
58
- except Exception as e:
59
- logger.error(f"AI Processing Error: {e}")
60
- return {"error": f"AI Processing failed: {str(e)}"}
 
5
  from google import genai
6
  from google.genai import types
7
  from dotenv import load_dotenv
8
+ from pydantic import BaseModel, Field
9
+ from typing import List, Optional
10
 
11
+ # Configure Logging
12
  logging.basicConfig(level=logging.INFO)
13
  logger = logging.getLogger(__name__)
14
 
15
  load_dotenv()
16
 
17
+ # Secure Configuration
18
  api_key = os.getenv("GEMINI_API_KEY")
19
  if not api_key:
20
+ raise ValueError("GEMINI_API_KEY is missing.")
21
 
22
  client = genai.Client(api_key=api_key)
23
 
24
+ # --- 1. Define Strict Schema (Production Best Practice) ---
25
+ class ResumeSchema(BaseModel):
26
+ name: Optional[str] = Field(None, description="Candidate's full name")
27
+ email: Optional[str] = Field(None, description="Email address")
28
+ phone: Optional[str] = Field(None, description="Phone number")
29
+ skills: List[str] = Field(default_factory=list, description="List of technical skills")
30
+ summary: Optional[str] = Field(None, description="Brief professional summary")
31
 
32
  def extract_text_from_stream(file_bytes: bytes) -> str:
33
+ """Extracts raw text content from PDF bytes directly in memory."""
34
  text = ""
35
  try:
36
  with fitz.open(stream=file_bytes, filetype="pdf") as doc:
 
41
  raise ValueError("Failed to extract text from PDF.")
42
  return text
43
 
 
44
  def parse_resume_with_ai(resume_text: str) -> dict:
 
 
 
 
 
 
 
 
 
 
 
45
  """
46
+ Production-grade parser with Model Fallback and Schema Validation.
47
+ """
48
+ prompt = """
49
+ Extract structured data from this resume.
50
+ Return strictly valid JSON matching the requested schema.
51
+ """
52
+
53
+ # Define models to try in order of preference
54
+ # 1. Flash (Fast, Cheap)
55
+ # 2. Pro (Older, but highly stable on v1beta)
56
+ models_to_try = ["gemini-1.5-flash", "gemini-1.5-pro", "gemini-pro"]
57
+
58
+ last_exception = None
59
 
60
+ for model_name in models_to_try:
61
+ try:
62
+ logger.info(f"Attempting to parse using model: {model_name}")
63
+
64
+ response = client.models.generate_content(
65
+ model=model_name,
66
+ contents=prompt + "\n\n" + resume_text[:10000],
67
+ config=types.GenerateContentConfig(
68
+ response_mime_type="application/json",
69
+ response_schema=ResumeSchema # Pydantic schema enforcement
70
+ )
71
  )
72
+
73
+ # If successful, parse and return
74
+ if response.text:
75
+ data = json.loads(response.text)
76
+ return data
77
+
78
+ except Exception as e:
79
+ logger.warning(f"Model {model_name} failed: {e}")
80
+ last_exception = e
81
+ # Continue to the next model in the list...
82
 
83
+ # If all models fail, return the error
84
+ logger.error("All models failed to process the resume.")
85
+ return {"error": f"Processing failed. Root cause: {str(last_exception)}"}