LovnishVerma commited on
Commit
97cdcdc
·
verified ·
1 Parent(s): c4ce8da

Update parser_logic.py

Browse files
Files changed (1) hide show
  1. parser_logic.py +68 -70
parser_logic.py CHANGED
@@ -1,71 +1,69 @@
1
- import os
2
- import json
3
- import re
4
- import logging
5
- import fitz # PyMuPDF
6
- import google.generativeai as genai
7
- from dotenv import load_dotenv
8
-
9
- # Configure Logging
10
- logging.basicConfig(level=logging.INFO)
11
- logger = logging.getLogger(__name__)
12
-
13
- load_dotenv()
14
-
15
- # Secure Configuration
16
- api_key = os.getenv("GEMINI_API_KEY")
17
- if not api_key:
18
- logger.error("GEMINI_API_KEY not found in environment variables.")
19
- raise ValueError("GEMINI_API_KEY is missing.")
20
-
21
- genai.configure(api_key=api_key)
22
- model = genai.GenerativeModel('gemini-1.5-flash')
23
-
24
- def extract_text_from_stream(file_bytes: bytes) -> str:
25
- """Extracts raw text content from PDF bytes directly in memory."""
26
- text = ""
27
- try:
28
- # stream=file_bytes tells PyMuPDF to read from memory, not disk
29
- with fitz.open(stream=file_bytes, filetype="pdf") as doc:
30
- for page in doc:
31
- text += page.get_text()
32
- except Exception as e:
33
- logger.error(f"PDF Extraction Error: {e}")
34
- raise ValueError("Failed to extract text from PDF. File may be corrupted.")
35
- return text
36
-
37
- def parse_resume_with_ai(resume_text: str) -> dict:
38
- """Uses GenAI to transform unstructured text into JSON."""
39
-
40
- # Prompt Engineering: Added instructions for "null" values to keep schema consistent
41
- prompt = f"""
42
- Acting as an expert recruiter, extract the following data from this resume text:
43
- - name (string)
44
- - email (string)
45
- - phone (string)
46
- - skills (array of strings)
47
- - summary (string, max 2 sentences)
48
-
49
- If a field is not found, return null or an empty list.
50
- Return strictly valid JSON. Do not include markdown formatting.
51
-
52
- Resume Text:
53
- {resume_text[:10000]}
54
- """
55
- # Truncate text to 10k chars to avoid token limits if user uploads a book
56
-
57
- try:
58
- response = model.generate_content(prompt)
59
-
60
- # Robust Cleaning: Remove Markdown, newlines, and non-json text
61
- raw_output = response.text.strip()
62
- # Remove ```json and ``` identifiers if present
63
- clean_json = re.sub(r'```json\s*|```', '', raw_output, flags=re.MULTILINE).strip()
64
-
65
- return json.loads(clean_json)
66
- except json.JSONDecodeError as e:
67
- logger.error(f"JSON Decode Error. Raw AI Output: {response.text}")
68
- return {"error": "AI response was not valid JSON", "raw_output": response.text}
69
- except Exception as e:
70
- logger.error(f"AI Processing Error: {e}")
71
  return {"error": f"AI Processing failed: {str(e)}"}
 
1
+ import os
2
+ import json
3
+ import re
4
+ import logging
5
+ import fitz # PyMuPDF
6
+ from google import genai
7
+ from dotenv import load_dotenv
8
+
9
+ # Configure Logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+ load_dotenv()
14
+
15
+ # Secure Configuration
16
+ api_key = os.getenv("GEMINI_API_KEY")
17
+ if not api_key:
18
+ logger.error("GEMINI_API_KEY not found in environment variables.")
19
+ raise ValueError("GEMINI_API_KEY is missing.")
20
+
21
+ # NEW SDK INITIALIZATION
22
+ client = genai.Client(api_key=api_key)
23
+
24
+ def extract_text_from_stream(file_bytes: bytes) -> str:
25
+ """Extracts raw text content from PDF bytes directly in memory."""
26
+ text = ""
27
+ try:
28
+ with fitz.open(stream=file_bytes, filetype="pdf") as doc:
29
+ for page in doc:
30
+ text += page.get_text()
31
+ except Exception as e:
32
+ logger.error(f"PDF Extraction Error: {e}")
33
+ raise ValueError("Failed to extract text from PDF. File may be corrupted.")
34
+ return text
35
+
36
+ def parse_resume_with_ai(resume_text: str) -> dict:
37
+ """Uses GenAI to transform unstructured text into JSON."""
38
+
39
+ prompt = f"""
40
+ Acting as an expert recruiter, extract the following data from this resume text:
41
+ - name (string)
42
+ - email (string)
43
+ - phone (string)
44
+ - skills (array of strings)
45
+ - summary (string, max 2 sentences)
46
+
47
+ If a field is not found, return null or an empty list.
48
+ Return strictly valid JSON. Do not include markdown formatting.
49
+
50
+ Resume Text:
51
+ {resume_text[:10000]}
52
+ """
53
+
54
+ try:
55
+ # NEW SDK CALL
56
+ response = client.models.generate_content(
57
+ model="gemini-1.5-flash",
58
+ contents=prompt
59
+ )
60
+
61
+ # Robust Cleaning
62
+ raw_output = response.text.strip()
63
+ clean_json = re.sub(r'```json\s*|```', '', raw_output, flags=re.MULTILINE).strip()
64
+
65
+ return json.loads(clean_json)
66
+
67
+ except Exception as e:
68
+ logger.error(f"AI Processing Error: {e}")
 
 
69
  return {"error": f"AI Processing failed: {str(e)}"}