Spaces:
Sleeping
Sleeping
Update parser_logic.py
Browse files- parser_logic.py +15 -19
parser_logic.py
CHANGED
|
@@ -4,6 +4,7 @@ import re
|
|
| 4 |
import logging
|
| 5 |
import fitz # PyMuPDF
|
| 6 |
from google import genai
|
|
|
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
|
| 9 |
# Configure Logging
|
|
@@ -15,10 +16,10 @@ load_dotenv()
|
|
| 15 |
# Secure Configuration
|
| 16 |
api_key = os.getenv("GEMINI_API_KEY")
|
| 17 |
if not api_key:
|
| 18 |
-
logger.error("GEMINI_API_KEY not found
|
| 19 |
raise ValueError("GEMINI_API_KEY is missing.")
|
| 20 |
|
| 21 |
-
#
|
| 22 |
client = genai.Client(api_key=api_key)
|
| 23 |
|
| 24 |
def extract_text_from_stream(file_bytes: bytes) -> str:
|
|
@@ -30,40 +31,35 @@ def extract_text_from_stream(file_bytes: bytes) -> str:
|
|
| 30 |
text += page.get_text()
|
| 31 |
except Exception as e:
|
| 32 |
logger.error(f"PDF Extraction Error: {e}")
|
| 33 |
-
raise ValueError("Failed to extract text from PDF.
|
| 34 |
return text
|
| 35 |
|
| 36 |
def parse_resume_with_ai(resume_text: str) -> dict:
|
| 37 |
"""Uses GenAI to transform unstructured text into JSON."""
|
| 38 |
|
| 39 |
prompt = f"""
|
| 40 |
-
|
| 41 |
-
- name (
|
| 42 |
-
|
| 43 |
-
- phone (string)
|
| 44 |
-
- skills (array of strings)
|
| 45 |
-
- summary (string, max 2 sentences)
|
| 46 |
-
|
| 47 |
-
If a field is not found, return null or an empty list.
|
| 48 |
-
Return strictly valid JSON. Do not include markdown formatting.
|
| 49 |
|
| 50 |
Resume Text:
|
| 51 |
{resume_text[:10000]}
|
| 52 |
"""
|
| 53 |
|
| 54 |
try:
|
| 55 |
-
# NEW SDK
|
| 56 |
response = client.models.generate_content(
|
| 57 |
model="gemini-1.5-flash",
|
| 58 |
-
contents=prompt
|
|
|
|
|
|
|
|
|
|
| 59 |
)
|
| 60 |
|
| 61 |
-
#
|
| 62 |
-
|
| 63 |
-
clean_json = re.sub(r'```json\s*|```', '', raw_output, flags=re.MULTILINE).strip()
|
| 64 |
-
|
| 65 |
-
return json.loads(clean_json)
|
| 66 |
|
| 67 |
except Exception as e:
|
| 68 |
logger.error(f"AI Processing Error: {e}")
|
|
|
|
| 69 |
return {"error": f"AI Processing failed: {str(e)}"}
|
|
|
|
| 4 |
import logging
|
| 5 |
import fitz # PyMuPDF
|
| 6 |
from google import genai
|
| 7 |
+
from google.genai import types
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
|
| 10 |
# Configure Logging
|
|
|
|
| 16 |
# Secure Configuration
|
| 17 |
api_key = os.getenv("GEMINI_API_KEY")
|
| 18 |
if not api_key:
|
| 19 |
+
logger.error("GEMINI_API_KEY not found.")
|
| 20 |
raise ValueError("GEMINI_API_KEY is missing.")
|
| 21 |
|
| 22 |
+
# Initialize the NEW Client
|
| 23 |
client = genai.Client(api_key=api_key)
|
| 24 |
|
| 25 |
def extract_text_from_stream(file_bytes: bytes) -> str:
|
|
|
|
| 31 |
text += page.get_text()
|
| 32 |
except Exception as e:
|
| 33 |
logger.error(f"PDF Extraction Error: {e}")
|
| 34 |
+
raise ValueError("Failed to extract text from PDF.")
|
| 35 |
return text
|
| 36 |
|
| 37 |
def parse_resume_with_ai(resume_text: str) -> dict:
|
| 38 |
"""Uses GenAI to transform unstructured text into JSON."""
|
| 39 |
|
| 40 |
prompt = f"""
|
| 41 |
+
Extract the following data from this resume text:
|
| 42 |
+
- name, email, phone, skills (list), and summary.
|
| 43 |
+
Return strictly valid JSON.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
Resume Text:
|
| 46 |
{resume_text[:10000]}
|
| 47 |
"""
|
| 48 |
|
| 49 |
try:
|
| 50 |
+
# NEW SDK METHOD
|
| 51 |
response = client.models.generate_content(
|
| 52 |
model="gemini-1.5-flash",
|
| 53 |
+
contents=prompt,
|
| 54 |
+
config=types.GenerateContentConfig(
|
| 55 |
+
response_mime_type="application/json"
|
| 56 |
+
)
|
| 57 |
)
|
| 58 |
|
| 59 |
+
# Parse the JSON response directly
|
| 60 |
+
return json.loads(response.text)
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
except Exception as e:
|
| 63 |
logger.error(f"AI Processing Error: {e}")
|
| 64 |
+
# Fallback to plain text error if JSON fails
|
| 65 |
return {"error": f"AI Processing failed: {str(e)}"}
|