Spaces:

rairo
/

recruitment-api

Sleeping

File size: 6,759 Bytes

bcfae1d
 
a9d1468
bcfae1d
4f0837d
 
a9d1468
 
bcfae1d
 
4f0837d
 
 
bcfae1d
 
 
4f0837d
 
 
 
bcfae1d
 
4f0837d
 
 
 
 
 
 
bcfae1d
 
4f0837d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bcfae1d
447acee
a9d1468
447acee
 
 
 
 
 
a9d1468
 
447acee
a9d1468
 
 
447acee
a9d1468
 
 
 
 
 
 
 
 
 
 
 
 
4f0837d
a9d1468
4f0837d
 
 
 
bcfae1d
a9d1468
 
 
 
 
 
 
 
447acee
a9d1468
 
 
bcfae1d
a9d1468
 
 
 
447acee
a9d1468
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
447acee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9d1468
 
447acee
a9d1468
 
 
 
 
447acee
a9d1468
 
 
 
4f0837d
 
 
bcfae1d
4f0837d
 
bcfae1d
 
4f0837d

from flask import Flask, request, jsonify
import re
import os
import json
import tempfile
import logging
import google.generativeai as genai
import pypdf
from flask_cors import CORS

# Setup logging
logging.basicConfig(level=logging.INFO)

app = Flask(__name__)
CORS(app)  # Enable CORS for all routes

# Get API key securely
api_key = os.getenv('Gemini')
if not api_key:
    raise ValueError("Gemini API key not found in environment variables")

def configure_gemini(api_key):
    """Configure Gemini AI model."""
    try:
        genai.configure(api_key=api_key)
        return genai.GenerativeModel('gemini-2.0-flash-exp')
    except Exception as e:
        logging.error(f"Error configuring Gemini: {str(e)}")
        raise

def read_pdf(file_path):
    """Extract text from a PDF file."""
    try:
        text_content = []
        with open(file_path, 'rb') as file:
            pdf_reader = pypdf.PdfReader(file)
            for page in pdf_reader.pages:
                text = page.extract_text()
                if text:
                    text_content.append(text)
        return "\n".join(text_content)
    except Exception as e:
        logging.error(f"Error reading PDF: {str(e)}")
        raise

def extract_json_from_response(response_text):
    """Extract valid JSON from Gemini's response."""
    try:
        json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
        if not json_match:
            raise ValueError("No valid JSON found in response")
        
        json_str = json_match.group(0).replace('```json', '').replace('```', '').strip()
        return json.loads(json_str)
    except json.JSONDecodeError as e:
        logging.error(f"Error parsing JSON: {str(e)}")
        raise
    except Exception as e:
        logging.error(f"Unexpected error extracting JSON: {str(e)}")
        raise

def process_with_gemini(model, prompt, text, answers_text=None):
    """Send combined text (job description and applicant documents) to Gemini and return structured JSON response."""
    
    # Build the applicant section with answers if provided
    applicant_section = f"Applicant Documents:\n{text}"
    if answers_text:
        applicant_section += f"\n\nApplicant's Answers to Screening Questions:\n{answers_text}"
    
    full_prompt = f"""
You are an expert HR recruiter. Based on the job description and the applicant's documents provided below, assess the applicant's suitability for the job.

Job Description:
{prompt}

{applicant_section}

Provide a detailed analysis that includes:
- A score out of 100 evaluating the applicant's fit for the role.
- A recommendation for hiring: either "hire" or "do not hire".
- A clear justification for the decision.

Return ONLY valid JSON with this exact structure:
{{
    "score": <number>,
    "recommendation": "<hire/do not hire>",
    "justification": "<string>"
}}
"""
    try:
        response = model.generate_content([full_prompt])
        return extract_json_from_response(response.text)
    except Exception as e:
        logging.error(f"Error processing text with Gemini: {str(e)}")
        raise

@app.route('/process-application', methods=['POST'])
def process_application():
    """
    Handle applicant documents and job details.
    Expects:
      - job_id: form data string
      - job_description: form data string
      - applicant_id: form data string
      - questions_answers: (optional) form data string - JSON string containing Q&A pairs
      - files: one or more PDF files (e.g., CV, cover letter)
    Returns a JSON response with score, recommendation, and justification.
    """
    try:
        # Get job-related details from the form data
        job_id = request.form.get('job_id')
        job_description = request.form.get('job_description')
        applicant_id = request.form.get('applicant_id')
        questions_answers = request.form.get('questions_answers')  # New field for Q&A

        if not job_id or not job_description or not applicant_id:
            return jsonify({'error': 'job_id, job_description, and applicant_id are required'}), 400

        # Ensure at least one file is uploaded
        if 'files' not in request.files:
            return jsonify({'error': 'No files uploaded'}), 400

        files = request.files.getlist('files')
        if len(files) == 0:
            return jsonify({'error': 'No files provided'}), 400

        combined_text = ""
        temp_files = []

        # Process each uploaded PDF file
        for file in files:
            if file.filename == '':
                continue
            if not file.filename.endswith('.pdf'):
                return jsonify({'error': f'File {file.filename} must be a PDF'}), 400
            # Save file to a temporary location
            temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
            file.save(temp_file.name)
            temp_files.append(temp_file.name)
            # Extract text and combine
            combined_text += read_pdf(temp_file.name) + "\n\n"

        # Parse and format questions_answers if provided
        answers_text = None
        if questions_answers:
            try:
                qa_data = json.loads(questions_answers)
                # Format the Q&A data for better readability
                formatted_answers = []
                if isinstance(qa_data, list):
                    for idx, qa in enumerate(qa_data, 1):
                        question = qa.get('question', 'N/A')
                        response = qa.get('response', 'N/A')
                        formatted_answers.append(f"Q{idx}: {question}\nA{idx}: {response}")
                else:
                    formatted_answers.append(str(qa_data))
                answers_text = "\n\n".join(formatted_answers)
            except json.JSONDecodeError:
                logging.warning("questions_answers is not valid JSON, treating as plain text")
                answers_text = questions_answers

        # Configure Gemini and process combined text
        model = configure_gemini(api_key)
        extracted_data = process_with_gemini(model, job_description, combined_text, answers_text)

        # Clean up temporary files
        for temp_file in temp_files:
            os.remove(temp_file)

        # Log the processing
        logging.info(f"Processed application for applicant_id: {applicant_id} for job_id: {job_id}")

        return jsonify(extracted_data)

    except ValueError as ve:
        logging.warning(f"Client error: {str(ve)}")
        return jsonify({'error': str(ve)}), 400
    except Exception as e:
        logging.error(f"Internal server error: {str(e)}")
        return jsonify({'error': 'Internal server error'}), 500

if __name__ == '__main__':
    app.run(debug=True, host="0.0.0.0", port=7860)