import sqlite3
import json
import pandas as pd
from datetime import datetime

class ResumeDatabase:
    def __init__(self, db_path='resumes.db'):
        self.db_path = db_path
        self.create_tables()

    def create_tables(self):
        conn = sqlite3.connect(self.db_path)
        c = conn.cursor()
        
        c.execute('''CREATE TABLE IF NOT EXISTS resumes
                    (id INTEGER PRIMARY KEY AUTOINCREMENT,
                     name TEXT,
                     email TEXT,
                     phone TEXT,
                     raw_text TEXT,
                     analysis_json TEXT,
                     created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)''')
        
        conn.commit()
        conn.close()

    def save_analysis(self, analysis_result, raw_text):
        conn = sqlite3.connect(self.db_path)
        c = conn.cursor()
        
        c.execute('''INSERT INTO resumes (name, email, phone, raw_text, analysis_json)
                    VALUES (?, ?, ?, ?, ?)''',
                    (analysis_result.get('name', 'Not found'),
                     analysis_result.get('email', 'Not found'),
                     analysis_result.get('phone', 'Not found'),
                     raw_text,
                     json.dumps(analysis_result)))
        
        conn.commit()
        conn.close()

    def calculate_score(self, analysis):
        """Calculate a comprehensive score based on resume analysis"""
        try:
            # Initialize scores
            education_score = 0
            experience_score = 0
            technical_score = 0
            project_score = 0
            impact_score = 0
            role_specific_score = 0

            # Education Score (max 20 points)
            edu_level = str(analysis.get('education_level', '')).lower()
            if edu_level:
                if 'phd' in edu_level or 'doctorate' in edu_level:
                    education_score += 20
                elif 'master' in edu_level or 'ms' in edu_level or 'mtech' in edu_level:
                    education_score += 18
                elif 'bachelor' in edu_level or 'bs' in edu_level or 'btech' in edu_level:
                    education_score += 15
                else:
                    education_score += 10

            # Add points for CGPA if available
            cgpa = analysis.get('cgpa', 'Not found')
            if isinstance(cgpa, (int, float)):
                if cgpa >= 3.5:  # Assuming 4.0 scale
                    education_score = min(20, education_score + 2)

            # Experience Score (max 20 points)
            years_exp = analysis.get('years_experience', 0)
            if isinstance(years_exp, (int, float)):
                experience_score = min(20, years_exp * 4)  # 5 years for max score
            elif isinstance(years_exp, str) and years_exp.replace('.', '').isdigit():
                experience_score = min(20, float(years_exp) * 4)

            # Technical Score (max 20 points)
            tech_skills = {
                'programming_languages': analysis.get('programming_languages', []),
                'technical_skills': analysis.get('technical_skills', []),
                'ml_frameworks': analysis.get('ml_frameworks', []),
                'databases': analysis.get('databases', []),
                'cloud_platforms': analysis.get('cloud_platforms', [])
            }
            
            total_skills = sum(len(skills) for skills in tech_skills.values())
            technical_score = min(20, total_skills * 2)

            # Project Score (max 15 points)
            projects = len(analysis.get('projects', []))
            research_exp = 1 if analysis.get('research_experience') else 0
            publications = len(analysis.get('publications', []))
            
            project_score = min(15, projects * 2 + research_exp * 3 + publications * 2)

            # Impact Score (max 15 points)
            leadership = 1 if analysis.get('leadership_experience') else 0
            team_size = analysis.get('team_size', 0)
            if isinstance(team_size, str):
                try:
                    team_size = int(''.join(filter(str.isdigit, team_size)))
                except:
                    team_size = 0
            
            certifications = len(analysis.get('certifications', []))
            awards = len(analysis.get('awards', []))
            
            impact_score = min(15, leadership * 5 + min(5, team_size/2) + min(5, certifications * 2 + awards))

            # Role Specific Score (max 10 points)
            ds_skills = len(analysis.get('ml_frameworks', [])) + len(analysis.get('deep_learning', [])) + \
                       len(analysis.get('nlp_skills', [])) + len(analysis.get('computer_vision', []))
            
            de_skills = len(analysis.get('etl_tools', [])) + len(analysis.get('data_warehousing', [])) + \
                       len(analysis.get('orchestration_tools', [])) + len(analysis.get('streaming_tech', []))
            
            role_specific_score = min(10, max(ds_skills, de_skills))

            # Calculate total score
            total_score = education_score + experience_score + technical_score + \
                         project_score + impact_score + role_specific_score

            return {
                'total_score': total_score,
                'education_score': education_score,
                'experience_score': experience_score,
                'technical_score': technical_score,
                'project_score': project_score,
                'impact_score': impact_score,
                'role_specific_score': role_specific_score
            }
        except Exception as e:
            print(f"Error calculating score: {str(e)}")
            return {
                'total_score': 0,
                'education_score': 0,
                'experience_score': 0,
                'technical_score': 0,
                'project_score': 0,
                'impact_score': 0,
                'role_specific_score': 0
            }

    def get_statistics(self):
        """Get statistics of analyzed resumes"""
        conn = sqlite3.connect(self.db_path)
        df = pd.read_sql_query("SELECT analysis_json FROM resumes", conn)
        conn.close()

        if df.empty:
            return {
                'total_resumes': 0,
                'avg_work_experience': 0,
                'education_levels': {},
                'major_distribution': {},
                'top_programming_languages': {},
                'top_technical_skills': {},
                'top_ml_frameworks': {},
                'top_visualization_tools': {},
                'top_databases': {},
                'top_etl_tools': {},
                'top_streaming_tech': {},
                'top_cloud_platforms': {},
                'top_certifications': {},
                'university_distribution': {}
            }

        analyses = [json.loads(x) for x in df['analysis_json']]
        
        # Calculate statistics
        stats = {
            'total_resumes': len(analyses),
            'avg_work_experience': 0,
            'education_levels': {},
            'major_distribution': {},
            'top_programming_languages': {},
            'top_technical_skills': {},
            'top_ml_frameworks': {},
            'top_visualization_tools': {},
            'top_databases': {},
            'top_etl_tools': {},
            'top_streaming_tech': {},
            'top_cloud_platforms': {},
            'top_certifications': {},
            'university_distribution': {}
        }

        # Calculate averages and distributions
        total_exp = 0
        valid_exp = 0

        for analysis in analyses:
            # Work experience
            exp = analysis.get('years_experience', 0)
            if isinstance(exp, (int, float)) or (isinstance(exp, str) and exp.replace('.', '').isdigit()):
                try:
                    exp = float(exp)
                    total_exp += exp
                    valid_exp += 1
                except:
                    pass

            # Education level
            edu = analysis.get('education_level', 'Not specified')
            stats['education_levels'][edu] = stats['education_levels'].get(edu, 0) + 1

            # Major
            major = analysis.get('major', 'Not specified')
            stats['major_distribution'][major] = stats['major_distribution'].get(major, 0) + 1

            # University
            uni = analysis.get('university', 'Not specified')
            stats['university_distribution'][uni] = stats['university_distribution'].get(uni, 0) + 1

            # Technical skills distributions
            for lang in analysis.get('programming_languages', []):
                stats['top_programming_languages'][lang] = stats['top_programming_languages'].get(lang, 0) + 1

            for skill in analysis.get('technical_skills', []):
                stats['top_technical_skills'][skill] = stats['top_technical_skills'].get(skill, 0) + 1

            for framework in analysis.get('ml_frameworks', []):
                stats['top_ml_frameworks'][framework] = stats['top_ml_frameworks'].get(framework, 0) + 1

            for tool in analysis.get('visualization_tools', []):
                stats['top_visualization_tools'][tool] = stats['top_visualization_tools'].get(tool, 0) + 1

            for db in analysis.get('databases', []):
                stats['top_databases'][db] = stats['top_databases'].get(db, 0) + 1

            for tool in analysis.get('etl_tools', []):
                stats['top_etl_tools'][tool] = stats['top_etl_tools'].get(tool, 0) + 1

            for tech in analysis.get('streaming_tech', []):
                stats['top_streaming_tech'][tech] = stats['top_streaming_tech'].get(tech, 0) + 1

            for platform in analysis.get('cloud_platforms', []):
                stats['top_cloud_platforms'][platform] = stats['top_cloud_platforms'].get(platform, 0) + 1

            for cert in analysis.get('certifications', []):
                stats['top_certifications'][cert] = stats['top_certifications'].get(cert, 0) + 1

        # Calculate average work experience
        stats['avg_work_experience'] = total_exp / valid_exp if valid_exp > 0 else 0

        # Sort and limit distributions
        for key in stats:
            if isinstance(stats[key], dict):
                stats[key] = dict(sorted(stats[key].items(), key=lambda x: x[1], reverse=True)[:10])

        return stats

    def get_candidate_rankings(self, role_type='both', min_score=50):
        """Get ranked list of candidates based on their scores"""
        conn = sqlite3.connect(self.db_path)
        df = pd.read_sql_query("SELECT analysis_json FROM resumes", conn)
        conn.close()

        if df.empty:
            return []

        rankings = []
        for analysis_json in df['analysis_json']:
            analysis = json.loads(analysis_json)
            scores = self.calculate_score(analysis)
            
            if scores['total_score'] >= min_score:
                candidate = {
                    'name': analysis.get('name', 'Not found'),
                    'email': analysis.get('email', 'Not found'),
                    'years_experience': analysis.get('years_experience', 'Not found'),
                    'education_level': analysis.get('education_level', 'Not found'),
                    'key_skills': (
                        analysis.get('programming_languages', []) +
                        analysis.get('technical_skills', [])
                    )[:5],  # Top 5 skills
                    **scores
                }
                
                # Filter based on role type
                if role_type == 'data_science':
                    ds_score = len(analysis.get('ml_frameworks', [])) + \
                              len(analysis.get('deep_learning', [])) + \
                              len(analysis.get('nlp_skills', [])) + \
                              len(analysis.get('computer_vision', []))
                    if ds_score > 0:
                        rankings.append(candidate)
                elif role_type == 'data_engineering':
                    de_score = len(analysis.get('etl_tools', [])) + \
                              len(analysis.get('data_warehousing', [])) + \
                              len(analysis.get('orchestration_tools', [])) + \
                              len(analysis.get('streaming_tech', []))
                    if de_score > 0:
                        rankings.append(candidate)
                else:  # both
                    rankings.append(candidate)

        # Sort by total score
        rankings.sort(key=lambda x: x['total_score'], reverse=True)
        return rankings

    def export_to_csv(self):
        """Export analyses to CSV"""
        conn = sqlite3.connect(self.db_path)
        df = pd.read_sql_query("SELECT * FROM resumes", conn)
        conn.close()
        
        csv_path = f"resume_analyses_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
        df.to_csv(csv_path, index=False)
        return csv_path

    def export_to_json(self):
        """Export analyses to JSON"""
        conn = sqlite3.connect(self.db_path)
        df = pd.read_sql_query("SELECT * FROM resumes", conn)
        conn.close()
        
        json_path = f"resume_analyses_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        df.to_json(json_path, orient='records')
        return json_path