import os import io import base64 import json import re from dotenv import load_dotenv import streamlit as st import fitz # PyMuPDF import google.generativeai as genai from PIL import Image # Load environment variables load_dotenv() # Configure Google API Key api_key = os.getenv("GOOGLE_API_KEY") genai.configure(api_key=api_key) def get_gemini_response(pdf_content, prompt): model = genai.GenerativeModel('gemini-1.5-pro') response = model.generate_content([pdf_content[0], prompt]) return response.text def input_pdf_setup(uploaded_file): if uploaded_file is not None: try: # Open the uploaded file as a PDF document pdf_document = fitz.open(stream=uploaded_file.read(), filetype="pdf") # Extract the first page of the PDF first_page = pdf_document[0] # Convert the first page to an image pix = first_page.get_pixmap() # Create a BytesIO object to save the image img_byte_arr = io.BytesIO() # Convert the pixmap to an image using PIL img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) # Save the image as JPEG in memory img.save(img_byte_arr, format='JPEG') img_byte_arr = img_byte_arr.getvalue() # Encode the image to base64 format pdf_parts = [ { "mime_type": "image/jpeg", "data": base64.b64encode(img_byte_arr).decode() } ] return pdf_parts except Exception as e: st.error(f"Error processing PDF: {e}") return None else: raise FileNotFoundError("No file uploaded") def clean_and_format_json(response_text): # Extract JSON content json_match = re.search(r'```json\s*(.*?)\s*```', response_text, re.DOTALL) if json_match: json_str = json_match.group(1) # Remove any trailing commas before closing brackets or braces json_str = re.sub(r',\s*([}\]])', r'\1', json_str) return json_str return None # Streamlit App st.set_page_config(page_title="ATS Resume Expert") st.header("ATS Resume Parser") uploaded_file = st.file_uploader("Upload your resume (PDF)...", type=["pdf"]) if uploaded_file is not None: st.write("PDF Uploaded Successfully") submit = st.button("Parse Resume to JSON") if submit: if uploaded_file is not None: pdf_content = input_pdf_setup(uploaded_file) if pdf_content: input_prompt = """ Parse the content of the resume and convert it into a JSON format. The JSON should include the following fields: - Name - Contact Information (phone number, email, address) - Summary/Objective - Skills - Experience (company, position, start date, end date, responsibilities) - Education (institution, degree, start date, end date) - Certifications - Projects (name, description, technologies used) - Languages - Hobbies/Interests Provide the output in valid JSON format, enclosed in triple backticks with 'json' specified, like this: ```json { "key": "value" } ``` Ensure all JSON is properly formatted and there's no additional text outside the JSON. """ response_text = get_gemini_response(pdf_content, input_prompt) # Clean and format the response cleaned_json_str = clean_and_format_json(response_text) if cleaned_json_str: try: response_json = json.loads(cleaned_json_str) st.subheader("Parsed Resume in JSON") st.json(response_json) except json.JSONDecodeError as e: st.error(f"Error parsing JSON. Please try again.") else: st.error("Unable to extract JSON from the response. Please try again.") else: st.write("Please upload a valid resume") else: st.write("Please upload the resume")