File size: 4,241 Bytes
5fc4e12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f4f982
5fc4e12
2f4f982
 
 
 
 
5fc4e12
 
2f4f982
5fc4e12
2f4f982
 
5fc4e12
2f4f982
 
5fc4e12
 
 
2f4f982
5fc4e12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f4f982
 
5fc4e12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f4f982
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import os
import io
import base64
import json
import re
from dotenv import load_dotenv
import streamlit as st
import fitz  # PyMuPDF
import google.generativeai as genai
from PIL import Image

# Load environment variables
load_dotenv()

# Configure Google API Key
api_key = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=api_key)

def get_gemini_response(pdf_content, prompt):
    model = genai.GenerativeModel('gemini-1.5-pro')
    response = model.generate_content([pdf_content[0], prompt])
    return response.text

def input_pdf_setup(uploaded_file):
    if uploaded_file is not None:
        try:
            # Open the uploaded file as a PDF document
            pdf_document = fitz.open(stream=uploaded_file.read(), filetype="pdf")

            # Extract the first page of the PDF
            first_page = pdf_document[0]

            # Convert the first page to an image
            pix = first_page.get_pixmap()

            # Create a BytesIO object to save the image
            img_byte_arr = io.BytesIO()

            # Convert the pixmap to an image using PIL
            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

            # Save the image as JPEG in memory
            img.save(img_byte_arr, format='JPEG')
            img_byte_arr = img_byte_arr.getvalue()

            # Encode the image to base64 format
            pdf_parts = [
                {
                    "mime_type": "image/jpeg",
                    "data": base64.b64encode(img_byte_arr).decode()
                }
            ]
            return pdf_parts
        except Exception as e:
            st.error(f"Error processing PDF: {e}")
            return None
    else:
        raise FileNotFoundError("No file uploaded")

def clean_and_format_json(response_text):
    # Extract JSON content
    json_match = re.search(r'```json\s*(.*?)\s*```', response_text, re.DOTALL)
    if json_match:
        json_str = json_match.group(1)
        # Remove any trailing commas before closing brackets or braces
        json_str = re.sub(r',\s*([}\]])', r'\1', json_str)
        return json_str
    return None

# Streamlit App
st.set_page_config(page_title="ATS Resume Expert")
st.header("ATS Resume Parser")

uploaded_file = st.file_uploader("Upload your resume (PDF)...", type=["pdf"])

if uploaded_file is not None:
    st.write("PDF Uploaded Successfully")

submit = st.button("Parse Resume to JSON")

if submit:
    if uploaded_file is not None:
        pdf_content = input_pdf_setup(uploaded_file)
        if pdf_content:
            input_prompt = """
            Parse the content of the resume and convert it into a JSON format. The JSON should include the following fields:

            - Name
            - Contact Information (phone number, email, address)
            - Summary/Objective
            - Skills
            - Experience (company, position, start date, end date, responsibilities)
            - Education (institution, degree, start date, end date)
            - Certifications
            - Projects (name, description, technologies used)
            - Languages
            - Hobbies/Interests

            Provide the output in valid JSON format, enclosed in triple backticks with 'json' specified, like this:
            ```json
            {
                "key": "value"
            }
            ```
            Ensure all JSON is properly formatted and there's no additional text outside the JSON.
            """
            response_text = get_gemini_response(pdf_content, input_prompt)
            
            # Clean and format the response
            cleaned_json_str = clean_and_format_json(response_text)
            
            if cleaned_json_str:
                try:
                    response_json = json.loads(cleaned_json_str)
                    st.subheader("Parsed Resume in JSON")
                    st.json(response_json)
                except json.JSONDecodeError as e:
                    st.error(f"Error parsing JSON. Please try again.")
            else:
                st.error("Unable to extract JSON from the response. Please try again.")
        else:
            st.write("Please upload a valid resume")
    else:
        st.write("Please upload the resume")