File size: 6,759 Bytes
bcfae1d
 
a9d1468
bcfae1d
4f0837d
 
a9d1468
 
bcfae1d
 
4f0837d
 
 
bcfae1d
 
 
4f0837d
 
 
 
bcfae1d
 
4f0837d
 
 
 
 
 
 
bcfae1d
 
4f0837d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bcfae1d
447acee
a9d1468
447acee
 
 
 
 
 
a9d1468
 
447acee
a9d1468
 
 
447acee
a9d1468
 
 
 
 
 
 
 
 
 
 
 
 
4f0837d
a9d1468
4f0837d
 
 
 
bcfae1d
a9d1468
 
 
 
 
 
 
 
447acee
a9d1468
 
 
bcfae1d
a9d1468
 
 
 
447acee
a9d1468
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
447acee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9d1468
 
447acee
a9d1468
 
 
 
 
447acee
a9d1468
 
 
 
4f0837d
 
 
bcfae1d
4f0837d
 
bcfae1d
 
4f0837d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
from flask import Flask, request, jsonify
import re
import os
import json
import tempfile
import logging
import google.generativeai as genai
import pypdf
from flask_cors import CORS

# Setup logging
logging.basicConfig(level=logging.INFO)

app = Flask(__name__)
CORS(app)  # Enable CORS for all routes

# Get API key securely
api_key = os.getenv('Gemini')
if not api_key:
    raise ValueError("Gemini API key not found in environment variables")

def configure_gemini(api_key):
    """Configure Gemini AI model."""
    try:
        genai.configure(api_key=api_key)
        return genai.GenerativeModel('gemini-2.0-flash-exp')
    except Exception as e:
        logging.error(f"Error configuring Gemini: {str(e)}")
        raise

def read_pdf(file_path):
    """Extract text from a PDF file."""
    try:
        text_content = []
        with open(file_path, 'rb') as file:
            pdf_reader = pypdf.PdfReader(file)
            for page in pdf_reader.pages:
                text = page.extract_text()
                if text:
                    text_content.append(text)
        return "\n".join(text_content)
    except Exception as e:
        logging.error(f"Error reading PDF: {str(e)}")
        raise

def extract_json_from_response(response_text):
    """Extract valid JSON from Gemini's response."""
    try:
        json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
        if not json_match:
            raise ValueError("No valid JSON found in response")
        
        json_str = json_match.group(0).replace('```json', '').replace('```', '').strip()
        return json.loads(json_str)
    except json.JSONDecodeError as e:
        logging.error(f"Error parsing JSON: {str(e)}")
        raise
    except Exception as e:
        logging.error(f"Unexpected error extracting JSON: {str(e)}")
        raise

def process_with_gemini(model, prompt, text, answers_text=None):
    """Send combined text (job description and applicant documents) to Gemini and return structured JSON response."""
    
    # Build the applicant section with answers if provided
    applicant_section = f"Applicant Documents:\n{text}"
    if answers_text:
        applicant_section += f"\n\nApplicant's Answers to Screening Questions:\n{answers_text}"
    
    full_prompt = f"""
You are an expert HR recruiter. Based on the job description and the applicant's documents provided below, assess the applicant's suitability for the job.

Job Description:
{prompt}

{applicant_section}

Provide a detailed analysis that includes:
- A score out of 100 evaluating the applicant's fit for the role.
- A recommendation for hiring: either "hire" or "do not hire".
- A clear justification for the decision.

Return ONLY valid JSON with this exact structure:
{{
    "score": <number>,
    "recommendation": "<hire/do not hire>",
    "justification": "<string>"
}}
"""
    try:
        response = model.generate_content([full_prompt])
        return extract_json_from_response(response.text)
    except Exception as e:
        logging.error(f"Error processing text with Gemini: {str(e)}")
        raise

@app.route('/process-application', methods=['POST'])
def process_application():
    """
    Handle applicant documents and job details.
    Expects:
      - job_id: form data string
      - job_description: form data string
      - applicant_id: form data string
      - questions_answers: (optional) form data string - JSON string containing Q&A pairs
      - files: one or more PDF files (e.g., CV, cover letter)
    Returns a JSON response with score, recommendation, and justification.
    """
    try:
        # Get job-related details from the form data
        job_id = request.form.get('job_id')
        job_description = request.form.get('job_description')
        applicant_id = request.form.get('applicant_id')
        questions_answers = request.form.get('questions_answers')  # New field for Q&A

        if not job_id or not job_description or not applicant_id:
            return jsonify({'error': 'job_id, job_description, and applicant_id are required'}), 400

        # Ensure at least one file is uploaded
        if 'files' not in request.files:
            return jsonify({'error': 'No files uploaded'}), 400

        files = request.files.getlist('files')
        if len(files) == 0:
            return jsonify({'error': 'No files provided'}), 400

        combined_text = ""
        temp_files = []

        # Process each uploaded PDF file
        for file in files:
            if file.filename == '':
                continue
            if not file.filename.endswith('.pdf'):
                return jsonify({'error': f'File {file.filename} must be a PDF'}), 400
            # Save file to a temporary location
            temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
            file.save(temp_file.name)
            temp_files.append(temp_file.name)
            # Extract text and combine
            combined_text += read_pdf(temp_file.name) + "\n\n"

        # Parse and format questions_answers if provided
        answers_text = None
        if questions_answers:
            try:
                qa_data = json.loads(questions_answers)
                # Format the Q&A data for better readability
                formatted_answers = []
                if isinstance(qa_data, list):
                    for idx, qa in enumerate(qa_data, 1):
                        question = qa.get('question', 'N/A')
                        response = qa.get('response', 'N/A')
                        formatted_answers.append(f"Q{idx}: {question}\nA{idx}: {response}")
                else:
                    formatted_answers.append(str(qa_data))
                answers_text = "\n\n".join(formatted_answers)
            except json.JSONDecodeError:
                logging.warning("questions_answers is not valid JSON, treating as plain text")
                answers_text = questions_answers

        # Configure Gemini and process combined text
        model = configure_gemini(api_key)
        extracted_data = process_with_gemini(model, job_description, combined_text, answers_text)

        # Clean up temporary files
        for temp_file in temp_files:
            os.remove(temp_file)

        # Log the processing
        logging.info(f"Processed application for applicant_id: {applicant_id} for job_id: {job_id}")

        return jsonify(extracted_data)

    except ValueError as ve:
        logging.warning(f"Client error: {str(ve)}")
        return jsonify({'error': str(ve)}), 400
    except Exception as e:
        logging.error(f"Internal server error: {str(e)}")
        return jsonify({'error': 'Internal server error'}), 500

if __name__ == '__main__':
    app.run(debug=True, host="0.0.0.0", port=7860)