from flask import Flask, request, jsonify, send_from_directory
from flask_cors import CORS
import os
from werkzeug.utils import secure_filename
import PyPDF2
import docx
import re
import numpy as np
from typing import List, Dict, Any
import uuid
import logging
from logging.handlers import RotatingFileHandler

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


app = Flask(__name__)
CORS(app)

# Configuration
UPLOAD_FOLDER = os.path.join("/tmp", "uploads")
ALLOWED_EXTENSIONS = {'txt', 'pdf', 'doc', 'docx'}
MAX_FILE_SIZE = 16 * 1024 * 1024  # 16MB
os.environ["HF_HOME"] = "/tmp/hf_home"   # writable in Hugging Face Spaces


app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['MAX_CONTENT_LENGTH'] = MAX_FILE_SIZE

# Create upload directory if it doesn't exist
os.makedirs(UPLOAD_FOLDER, exist_ok=True)


# Try to load AI models (optional)
ai_models_loaded = False
classifier = None

try:

    from transformers import pipeline
    # Use a smaller, more efficient model
    classifier = pipeline(
        "zero-shot-classification",

        # model="facebook/bart-large-mnli",

        model="valhalla/distilbart-mnli-12-1",  # ✅ Lighter model than bart-large-mnli

        device=-1,  # Use CPU
        framework="pt"
    )
    ai_models_loaded = True
    logger.info("AI models loaded successfully (using distilbart-mnli-12-1)")

except ImportError:
    logger.warning("Transformers not installed, using fallback methods")
except Exception as e:
    logger.error(f"Error loading AI models: {e}, using fallback")


def allowed_file(filename):
    return '.' in filename and \
           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

def extract_text_from_file(file_path, filename):
    """Extract text from various file types"""
    text = ""
    
    if filename.endswith('.pdf'):
        try:
            with open(file_path, 'rb') as f:
                pdf_reader = PyPDF2.PdfReader(f)
                for page in pdf_reader.pages:
                    page_text = page.extract_text()
                    if page_text:
                        text += page_text + "\n"
        except Exception as e:
            logger.error(f"Error reading PDF: {e}")
            raise Exception(f"Failed to extract text from PDF: {e}")
    
    elif filename.endswith(('.doc', '.docx')):
        try:
            doc = docx.Document(file_path)
            for paragraph in doc.paragraphs:
                if paragraph.text:
                    text += paragraph.text + "\n"
        except Exception as e:
            logger.error(f"Error reading DOCX: {e}")
            raise Exception(f"Failed to extract text from DOCX: {e}")
    
    elif filename.endswith('.txt'):
        try:
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                text = f.read()
        except Exception as e:
            logger.error(f"Error reading TXT: {e}")
            raise Exception(f"Failed to extract text from TXT: {e}")
    
    if not text.strip():
        raise Exception("No text could be extracted from the file")
    
    # Clean up text
    text = re.sub(r'\s+', ' ', text).strip()
    return text

def extract_skills(text):
    """Extract skills from text using pattern matching"""
    # Comprehensive skills list with improved matching
    common_skills = [
        'python', 'java', 'javascript', 'typescript', 'react', 'angular', 'vue', 
        'node.js', 'express', 'django', 'flask', 'spring', 'laravel', 'ruby', 
        'php', 'html', 'css', 'sass', 'less', 'bootstrap', 'tailwind', 
        'sql', 'mysql', 'postgresql', 'mongodb', 'redis', 'oracle', 
        'aws', 'azure', 'google cloud', 'gcp', 'docker', 'kubernetes', 
        'jenkins', 'git', 'github', 'gitlab', 'ci/cd', 'devops', 
        'machine learning', 'ml', 'ai', 'deep learning', 'tensorflow', 
        'pytorch', 'keras', 'scikit-learn', 'data analysis', 'pandas', 
        'numpy', 'r', 'tableau', 'power bi', 'excel', 
        'agile', 'scrum', 'kanban', 'project management', 
        'rest api', 'graphql', 'microservices', 'api development',
        'c++', 'c#', 'net', 'swift', 'kotlin', 'go', 'rust'
    ]
    
    found_skills = set()
    text_lower = text.lower()
    
    # Use word boundaries for better matching
    for skill in common_skills:
        # Match whole words only to avoid false positives
        if re.search(r'\b' + re.escape(skill) + r'\b', text_lower):
            found_skills.add(skill.title())
    
    return list(found_skills)

def calculate_score(job_description, candidate_text, skills):
    """Calculate relevance score using AI models or fallback methods"""
    if classifier and ai_models_loaded:
        try:
            # Use AI model for scoring with better error handling
            sequence_to_classify = candidate_text[:512]  # Limit text length for the model
            
            # More specific labels for better classification
            candidate_labels = [
                "highly relevant candidate for the job", 
                "somewhat relevant candidate", 
                "irrelevant candidate for this position"
            ]
            
            result = classifier(sequence_to_classify, candidate_labels)
            # Weight the scores (highest for most relevant)
            relevance_score = (result['scores'][0] * 0.7 + result['scores'][1] * 0.3) * 100
            
            # Skills matching with better approach
            if skills:
                skill_match_score = min(100, len(skills) * 5)  # Cap at 100
            else:
                skill_match_score = 30
            
            # Combine scores (weighted average)
            final_score = (relevance_score * 0.7) + (skill_match_score * 0.3)
            
            return min(100, max(0, int(final_score)))
        
        except Exception as e:
            logger.error(f"Error in AI scoring: {e}, using fallback")
    
    # Fallback scoring method
    return calculate_fallback_score(job_description, candidate_text, skills)

def calculate_fallback_score(job_description, candidate_text, skills):
    """Fallback scoring method without AI"""
    score = 40  # Lower base score
    
    # Simple keyword matching with better approach
    job_lower = job_description.lower()
    candidate_lower = candidate_text.lower()
    
    # Extract meaningful words (4+ characters)
    job_words = set(re.findall(r'\b[a-z]{4,}\b', job_lower))
    candidate_words = set(re.findall(r'\b[a-z]{4,}\b', candidate_lower))
    
    # Remove common stop words
    stop_words = {'with', 'this', 'that', 'have', 'from', 'they', 'which', 'were', 'their'}
    job_words = job_words - stop_words
    candidate_words = candidate_words - stop_words
    
    common_words = job_words & candidate_words
    if job_words:
        keyword_match = len(common_words) / len(job_words) * 40  # Increased weight
        score += min(40, keyword_match)
    
    # Skills bonus
    if skills:
        score += min(20, len(skills) * 3)  # Increased bonus per skill
    
    # Experience indicators with context
    experience_indicators = [
        'experience', 'years', 'worked', 'developed', 'created', 'built',
        'managed', 'led', 'implemented', 'designed'
    ]
    for indicator in experience_indicators:
        if re.search(r'\b' + indicator + r'\b', candidate_lower):
            score += 2  # Increased points per indicator
    
    return min(100, max(0, int(score)))

def extract_candidate_info(text, filename):
    """Extract candidate information from text with improved patterns"""
    # Extract name with better pattern
    name_patterns = [
        r'(?:^|\n)[\s]*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)[\s]*(?:\n|$)',
        r'Resume[\s\S]{0,500}?([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)',
        r'Name[:]?[\s]*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)'
    ]
    
    name = filename.split('.')[0]  # Default to filename
    
    for pattern in name_patterns:
        name_match = re.search(pattern, text, re.IGNORECASE)
        if name_match:
            name = name_match.group(1).strip()
            break
    
    # Extract email
    email_match = re.search(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
    email = email_match.group(0) if email_match else "No email found"
    
    # Improved phone regex for international numbers
    phone_patterns = [
        r'(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}',
        r'(\+?\d{1,3}[-.\s]?)?\(?\d{2}\)?[-.\s]?\d{4}[-.\s]?\d{4}',
        r'(\+?\d{1,3}[-.\s]?)?\(?\d{4}\)?[-.\s]?\d{3}[-.\s]?\d{3}'
    ]
    
    phone = "No phone found"
    for pattern in phone_patterns:
        phone_match = re.search(pattern, text)
        if phone_match:
            phone = phone_match.group(0)
            break
    
    return name, email, phone

def analyze_candidate(job_description, candidate_text, filename):
    """Analyze a single candidate"""
    try:
        skills = extract_skills(candidate_text)
        score = calculate_score(job_description, candidate_text, skills)
        name, email, phone = extract_candidate_info(candidate_text, filename)
        
        return {
            'id': str(uuid.uuid4()),
            'name': name,
            'email': email,
            'phone': phone,
            'skills': skills,
            'score': score,
            'text_preview': candidate_text[:200] + '...' if len(candidate_text) > 200 else candidate_text
        }
    except Exception as e:
        logger.error(f"Error analyzing candidate: {e}")
        return {
            'id': str(uuid.uuid4()),
            'name': filename.split('.')[0],
            'email': "Error in extraction",
            'phone': "Error in extraction",
            'skills': [],
            'score': 0,
            'text_preview': "Error processing file",
            'error': str(e)
        }

@app.route('/api/process-resumes', methods=['POST'])
def process_resumes():
    """Process uploaded resumes against job description"""
    try:
        # Check if files are present
        if 'resumes' not in request.files:
            return jsonify({'error': 'Missing resume files'}), 400
        
        if 'jobDescription' not in request.files:
            return jsonify({'error': 'Missing job description file'}), 400
        
        job_desc_file = request.files['jobDescription']
        resume_files = request.files.getlist('resumes')
        
        # Validate job description file
        if job_desc_file.filename == '':
            return jsonify({'error': 'No job description file selected'}), 400
        
        if not allowed_file(job_desc_file.filename):
            return jsonify({'error': 'Invalid job description file type'}), 400
        
        # Validate resume files
        valid_resumes = []
        for file in resume_files:
            if file.filename != '' and allowed_file(file.filename):
                valid_resumes.append(file)
        
        if not valid_resumes:
            return jsonify({'error': 'No valid resume files'}), 400
        
        # Save and process job description
        job_desc_filename = secure_filename(job_desc_file.filename)
        job_desc_path = os.path.join(app.config['UPLOAD_FOLDER'], job_desc_filename)
        job_desc_file.save(job_desc_path)
        
        try:
            job_description = extract_text_from_file(job_desc_path, job_desc_filename)
        except Exception as e:
            return jsonify({'error': f'Failed to process job description: {str(e)}'}), 400
        
        # Process each resume
        candidates = []
        for resume_file in valid_resumes:
            resume_filename = secure_filename(resume_file.filename)
            resume_path = os.path.join(app.config['UPLOAD_FOLDER'], resume_filename)
            resume_file.save(resume_path)
            
            try:
                # Extract text from resume
                resume_text = extract_text_from_file(resume_path, resume_filename)
                
                # Analyze candidate
                candidate = analyze_candidate(job_description, resume_text, resume_filename)
                candidates.append(candidate)
                
            except Exception as e:
                logger.error(f"Error processing {resume_filename}: {e}")
                candidates.append({
                    'id': str(uuid.uuid4()),
                    'name': resume_filename.split('.')[0],
                    'email': "Processing error",
                    'phone': "Processing error",
                    'skills': [],
                    'score': 0,
                    'text_preview': f"Error: {str(e)}",
                    'error': str(e)
                })
            
            # Clean up resume file
            try:
                os.remove(resume_path)
            except:
                pass
        
        # Clean up job description file
        try:
            os.remove(job_desc_path)
        except:
            pass
        
        # Sort candidates by score
        candidates.sort(key=lambda x: x['score'], reverse=True)
        
        return jsonify({
            'candidates': candidates,
            'job_description': job_description[:500] + '...' if len(job_description) > 500 else job_description,
            'total_processed': len(candidates),
            'ai_used': ai_models_loaded
        })
    
    except Exception as e:
        logger.error(f"Error processing resumes: {e}")
        return jsonify({'error': 'Internal server error'}), 500

@app.route('/api/health', methods=['GET'])
def health_check():
    """Health check endpoint"""
    return jsonify({
        'status': 'healthy',
        'ai_models_loaded': ai_models_loaded,
        'upload_folder_exists': os.path.exists(UPLOAD_FOLDER)
    })

@app.route('/')
def index():
    return jsonify({'message': 'Resume Analyzer API is running'})

if __name__ == "__main__":
    port = int(os.environ.get("PORT", 10000))
    app.run(host="0.0.0.0", port=port, debug=False)