import re

class ResumeAnalyzer:
    def __init__(self):
        # Document type indicators
        self.document_types = {
            'resume': [
                'experience', 'education', 'skills', 'work', 'project', 'objective',
                'summary', 'employment', 'qualification', 'achievements'
            ],
            'marksheet': [
                'grade', 'marks', 'score', 'semester', 'cgpa', 'sgpa', 'examination',
                'result', 'academic year', 'percentage'
            ],
            'certificate': [
                'certificate', 'certification', 'awarded', 'completed', 'achievement',
                'training', 'course completion', 'qualified'
            ],
            'id_card': [
                'id card', 'identity', 'student id', 'employee id', 'valid until',
                'date of issue', 'identification'
            ]
        }
        
    def detect_document_type(self, text):
        text = text.lower()
        scores = {}
        
        # Calculate score for each document type
        for doc_type, keywords in self.document_types.items():
            matches = sum(1 for keyword in keywords if keyword in text)
            density = matches / len(keywords)
            frequency = matches / (len(text.split()) + 1)  # Add 1 to avoid division by zero
            scores[doc_type] = (density * 0.7) + (frequency * 0.3)
        
        # Get the highest scoring document type
        best_match = max(scores.items(), key=lambda x: x[1])
        
        # Only return a document type if the score is significant
        return best_match[0] if best_match[1] > 0.15 else 'unknown'
        
    def calculate_keyword_match(self, resume_text, required_skills):
        resume_text = resume_text.lower()
        found_skills = []
        missing_skills = []
        
        for skill in required_skills:
            skill_lower = skill.lower()
            # Check for exact match
            if skill_lower in resume_text:
                found_skills.append(skill)
            # Check for partial matches (e.g., "Python" in "Python programming")
            elif any(skill_lower in phrase for phrase in resume_text.split('.')):
                found_skills.append(skill)
            else:
                missing_skills.append(skill)
                
        match_score = (len(found_skills) / len(required_skills)) * 100 if required_skills else 0
        
        return {
            'score': match_score,
            'found_skills': found_skills,
            'missing_skills': missing_skills
        }
        
    def check_resume_sections(self, text):
        text = text.lower()
        essential_sections = {
            'contact': ['email', 'phone', 'address', 'linkedin'],
            'education': ['education', 'university', 'college', 'degree', 'academic'],
            'experience': ['experience', 'work', 'employment', 'job', 'internship'],
            'skills': ['skills', 'technologies', 'tools', 'proficiencies', 'expertise']
        }
        
        section_scores = {}
        for section, keywords in essential_sections.items():
            found = sum(1 for keyword in keywords if keyword in text)
            section_scores[section] = min(25, (found / len(keywords)) * 25)
            
        return sum(section_scores.values())
        
    def check_formatting(self, text):
        lines = text.split('\n')
        score = 100
        deductions = []
        
        # Check for minimum content
        if len(text) < 300:
            score -= 30
            deductions.append("Resume is too short")
            
        # Check for section headers
        if not any(line.isupper() for line in lines):
            score -= 20
            deductions.append("No clear section headers found")
            
        # Check for bullet points
        if not any(line.strip().startswith(('•', '-', '*', '→')) for line in lines):
            score -= 20
            deductions.append("No bullet points found for listing details")
            
        # Check for consistent spacing
        if any(len(line.strip()) == 0 and len(next_line.strip()) == 0 
               for line, next_line in zip(lines[:-1], lines[1:])):
            score -= 15
            deductions.append("Inconsistent spacing between sections")
            
        # Check for contact information format
        contact_patterns = [
            r'\b[\w\.-]+@[\w\.-]+\.\w+\b',  # email
            r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b',  # phone
            r'linkedin\.com/\w+',  # LinkedIn
        ]
        if not any(re.search(pattern, text) for pattern in contact_patterns):
            score -= 15
            deductions.append("Missing or improperly formatted contact information")
            
        return max(0, score), deductions
        
    def extract_text_from_pdf(self, file):
        try:
            import PyPDF2
            import io
            
            # Create a PDF reader object
            # First make sure we have the file content as bytes
            if hasattr(file, 'read'):
                # If it's already a file-like object, read it
                file_content = file.read()
                file.seek(0)  # Reset file pointer
            else:
                # If it's already bytes
                file_content = file
                
            # Create BytesIO from bytes content
            pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_content))
            
            # Extract text from all pages
            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text() + "\n"
                
            return text
        except Exception as e:
            raise Exception(f"Error extracting text from PDF: {str(e)}")
            
    def extract_text_from_docx(self, docx_file):
        """Extract text from a DOCX file"""
        try:
            from docx import Document
            doc = Document(docx_file)
            full_text = []
            for paragraph in doc.paragraphs:
                full_text.append(paragraph.text)
            return '\n'.join(full_text)
        except Exception as e:
            raise Exception(f"Error extracting text from DOCX file: {str(e)}")

    def extract_personal_info(self, text):
        """Extract personal information from resume text"""
        # Basic patterns for personal info
        email_pattern = r'[\w\.-]+@[\w\.-]+\.\w+'
        phone_pattern = r'(\+\d{1,3}[-.]?)?\s*\(?\d{3}\)?[-.]?\s*\d{3}[-.]?\s*\d{4}'
        linkedin_pattern = r'linkedin\.com/in/[\w-]+'
        github_pattern = r'github\.com/[\w-]+'
        
        # Extract information
        email = re.search(email_pattern, text)
        phone = re.search(phone_pattern, text)
        linkedin = re.search(linkedin_pattern, text)
        github = re.search(github_pattern, text)
        
        # Get the first line as name (basic assumption)
        name = text.split('\n')[0].strip()
        
        return {
            'name': name if len(name) > 0 else 'Unknown',
            'email': email.group(0) if email else '',
            'phone': phone.group(0) if phone else '',
            'linkedin': linkedin.group(0) if linkedin else '',
            'github': github.group(0) if github else '',
            'portfolio': ''  # Can be enhanced later
        }

    def extract_education(self, text):
        """Extract education information from resume text"""
        education = []
        lines = text.split('\n')
        education_keywords = [
            'education', 'academic', 'qualification', 'degree', 'university', 'college',
            'school', 'institute', 'certification', 'diploma', 'bachelor', 'master',
            'phd', 'b.tech', 'm.tech', 'b.e', 'm.e', 'b.sc', 'm.sc','bca', 'mca', 'b.com',
            'm.com', 'b.cs-it', 'imca', 'bba', 'mba', 'honors', 'scholarship'
        ]
        in_education_section = False
        current_entry = []

        for line in lines:
            line = line.strip()
            # Check for section header
            if any(keyword.lower() in line.lower() for keyword in education_keywords):
                if not any(keyword.lower() == line.lower() for keyword in education_keywords):
                    # This line contains education info, not just a header
                    current_entry.append(line)
                in_education_section = True
                continue
            
            if in_education_section:
                # Check if we've hit another section
                if line and any(keyword.lower() in line.lower() for keyword in self.document_types['resume']):
                    if not any(edu_key.lower() in line.lower() for edu_key in education_keywords):
                        in_education_section = False
                        if current_entry:
                            education.append(' '.join(current_entry))
                            current_entry = []
                        continue
                
                if line:
                    current_entry.append(line)
                elif current_entry:  # Empty line and we have content
                    education.append(' '.join(current_entry))
                    current_entry = []
        
        if current_entry:
            education.append(' '.join(current_entry))
        
        return education

    def extract_experience(self, text):
        """Extract work experience information from resume text"""
        experience = []
        lines = text.split('\n')
        experience_keywords = [
            'experience', 'employment', 'work history', 'professional experience',
            'work experience', 'career history', 'professional background',
            'employment history', 'job history', 'positions held', 'experience',
            'job title', 'job responsibilities', 'job description', 'job summary'
        ]
        in_experience_section = False
        current_entry = []

        for line in lines:
            line = line.strip()
            # Check for section header
            if any(keyword.lower() in line.lower() for keyword in experience_keywords):
                if not any(keyword.lower() == line.lower() for keyword in experience_keywords):
                    # This line contains experience info, not just a header
                    current_entry.append(line)
                in_experience_section = True
                continue
            
            if in_experience_section:
                # Check if we've hit another section
                if line and any(keyword.lower() in line.lower() for keyword in self.document_types['resume']):
                    if not any(exp_key.lower() in line.lower() for exp_key in experience_keywords):
                        in_experience_section = False
                        if current_entry:
                            experience.append(' '.join(current_entry))
                            current_entry = []
                        continue
                
                if line:
                    current_entry.append(line)
                elif current_entry:  # Empty line and we have content
                    experience.append(' '.join(current_entry))
                    current_entry = []
        
        if current_entry:
            experience.append(' '.join(current_entry))
        
        return experience

    def extract_projects(self, text):
        """Extract project information from resume text"""
        projects = []
        lines = text.split('\n')
        project_keywords = [
            'projects', 'personal projects', 'academic projects', 'key projects',
            'major projects', 'professional projects', 'project experience',
            'relevant projects', 'featured projects','latest projects',
            'top projects'
        ]
        in_project_section = False
        current_entry = []

        for line in lines:
            line = line.strip()
            # Check for section header
            if any(keyword.lower() in line.lower() for keyword in project_keywords):
                if not any(keyword.lower() == line.lower() for keyword in project_keywords):
                    # This line contains project info, not just a header
                    current_entry.append(line)
                in_project_section = True
                continue
            
            if in_project_section:
                # Check if we've hit another section
                if line and any(keyword.lower() in line.lower() for keyword in self.document_types['resume']):
                    if not any(proj_key.lower() in line.lower() for proj_key in project_keywords):
                        in_project_section = False
                        if current_entry:
                            projects.append(' '.join(current_entry))
                            current_entry = []
                        continue
                
                if line:
                    current_entry.append(line)
                elif current_entry:  # Empty line and we have content
                    projects.append(' '.join(current_entry))
                    current_entry = []
        
        if current_entry:
            projects.append(' '.join(current_entry))
        
        return projects

    def extract_skills(self, text):
        """Extract skills from resume text"""
        skills = set()  # Use set to avoid duplicates
        lines = text.split('\n')
        skills_keywords = [
            'skills', 'technical skills', 'competencies', 'expertise',
            'core competencies', 'professional skills', 'key skills',
            'technical expertise', 'proficiencies', 'qualifications',
            'top skills', 'key skill', 'major skill', 'personal skill',
            'soft skills', 'soft skill', 'soft skillset'
        ]
        in_skills_section = False
        current_entry = []

        # Common skill separators
        separators = [',', '•', '|', '/', '\\', '·', '>', '-', '–', '―']

        for line in lines:
            line = line.strip()
            # Check for section header
            if any(keyword.lower() in line.lower() for keyword in skills_keywords):
                if not any(keyword.lower() == line.lower() for keyword in skills_keywords):
                    # This line contains skills, not just a header
                    current_entry.append(line)
                in_skills_section = True
                continue
            
            if in_skills_section:
                # Check if we've hit another section
                if line and any(keyword.lower() in line.lower() for keyword in self.document_types['resume']):
                    if not any(skill_key.lower() in line.lower() for skill_key in skills_keywords):
                        in_skills_section = False
                        if current_entry:
                            # Process the current entry
                            text_to_process = ' '.join(current_entry)
                            # Split by common separators
                            for separator in separators:
                                if separator in text_to_process:
                                    skills.update(skill.strip() for skill in text_to_process.split(separator) if skill.strip())
                            current_entry = []
                        continue
                
                if line:
                    current_entry.append(line)
                elif current_entry:  # Empty line and we have content
                    # Process the current entry
                    text_to_process = ' '.join(current_entry)
                    # Split by common separators
                    for separator in separators:
                        if separator in text_to_process:
                            skills.update(skill.strip() for skill in text_to_process.split(separator) if skill.strip())
                    current_entry = []
        
        if current_entry:
            # Process any remaining skills
            text_to_process = ' '.join(current_entry)
            for separator in separators:
                if separator in text_to_process:
                    skills.update(skill.strip() for skill in text_to_process.split(separator) if skill.strip())
        
        return list(skills)

    def extract_summary(self, text):
        """Extract summary/objective from resume text"""
        summary = []
        lines = text.split('\n')
        summary_keywords = [
            'summary', 'professional summary', 'career summary', 'objective',
            'career objective', 'professional objective', 'about me', 'profile',
            'professional profile', 'career profile', 'overview', 'skill summary'
        ]
        in_summary_section = False
        current_entry = []

        # Try to find summary at the beginning of the resume
        start_index = 0
        while start_index < min(10, len(lines)) and not lines[start_index].strip():
            start_index += 1

        # Check first few non-empty lines for potential summary
        first_lines = []
        lines_checked = 0
        for line in lines[start_index:]:
            if line.strip():
                first_lines.append(line.strip())
                lines_checked += 1
                if lines_checked >= 5:  # Check first 5 non-empty lines
                    break

        # If first few lines look like a summary (no special formatting, no contact info)
        if first_lines and not any(keyword in first_lines[0].lower() for keyword in summary_keywords):
            potential_summary = ' '.join(first_lines)
            if len(potential_summary.split()) > 10:  # More than 10 words
                if not re.search(r'\b(?:email|phone|address|tel|mobile|linkedin)\b', potential_summary.lower()):
                    summary.append(potential_summary)

        # Look for explicitly marked summary section
        for line in lines:
            line = line.strip()
            # Check for section header
            if any(keyword.lower() in line.lower() for keyword in summary_keywords):
                if not any(keyword.lower() == line.lower() for keyword in summary_keywords):
                    # This line contains summary info, not just a header
                    current_entry.append(line)
                in_summary_section = True
                continue
            
            if in_summary_section:
                # Check if we've hit another section
                if line and any(keyword.lower() in line.lower() for keyword in self.document_types['resume']):
                    if not any(sum_key.lower() in line.lower() for sum_key in summary_keywords):
                        in_summary_section = False
                        if current_entry:
                            summary.append(' '.join(current_entry))
                            current_entry = []
                        continue
                
                if line:
                    current_entry.append(line)
                elif current_entry:  # Empty line and we have content
                    summary.append(' '.join(current_entry))
                    current_entry = []
        
        if current_entry:
            summary.append(' '.join(current_entry))
        
        return ' '.join(summary) if summary else ''

    def analyze_resume(self, resume_data, job_requirements):
        """Analyze resume and return scores and recommendations"""
        try:
            text = resume_data.get('raw_text', '')
            
            # Extract personal information
            personal_info = self.extract_personal_info(text)
            
            # First detect document type
            doc_type = self.detect_document_type(text)
            if doc_type != 'resume':
                return {
                    'ats_score': 0,
                    'document_type': doc_type,
                    'keyword_match': {'score': 0, 'found_skills': [], 'missing_skills': []},
                    'section_score': 0,
                    'format_score': 0,
                    'suggestions': [f"This appears to be a {doc_type} document. Please upload a resume for ATS analysis."]
                }
                
            # Calculate keyword match
            required_skills = job_requirements.get('required_skills', [])
            keyword_match = self.calculate_keyword_match(text, required_skills)
            
            # Extract all resume sections
            education = self.extract_education(text)
            experience = self.extract_experience(text)
            projects = self.extract_projects(text)
            skills = list(self.extract_skills(text))  # Convert skills set to list
            summary = self.extract_summary(text)
            
            # Check resume sections
            section_score = self.check_resume_sections(text)
            
            # Check formatting
            format_score, format_deductions = self.check_formatting(text)
            
            # Generate section-specific suggestions
            contact_suggestions = []
            if not personal_info.get('email'):
                contact_suggestions.append("Add your email address")
            if not personal_info.get('phone'):
                contact_suggestions.append("Add your phone number")
            if not personal_info.get('linkedin'):
                contact_suggestions.append("Add your LinkedIn profile URL")
            
            summary_suggestions = []
            if not summary:
                summary_suggestions.append("Add a professional summary to highlight your key qualifications")
            elif len(summary.split()) < 30:
                summary_suggestions.append("Expand your professional summary to better highlight your experience and goals")
            elif len(summary.split()) > 100:
                summary_suggestions.append("Consider making your summary more concise (aim for 50-75 words)")
            
            skills_suggestions = []
            if not skills:
                skills_suggestions.append("Add a dedicated skills section")
            if isinstance(skills, (list, set)) and len(list(skills)) < 5:
                skills_suggestions.append("List more relevant technical and soft skills")
            if keyword_match['score'] < 70:
                skills_suggestions.append("Add more skills that match the job requirements")
            
            experience_suggestions = []
            if not experience:
                experience_suggestions.append("Add your work experience section")
            else:
                has_dates = any(re.search(r'\b(19|20)\d{2}\b', exp) for exp in experience)
                has_bullets = any(re.search(r'[•\-\*]', exp) for exp in experience)
                has_action_verbs = any(re.search(r'\b(developed|managed|created|implemented|designed|led|improved)\b', 
                                               exp.lower()) for exp in experience)
                
                if not has_dates:
                    experience_suggestions.append("Include dates for each work experience")
                if not has_bullets:
                    experience_suggestions.append("Use bullet points to list your achievements and responsibilities")
                if not has_action_verbs:
                    experience_suggestions.append("Start bullet points with strong action verbs")
            
            education_suggestions = []
            if not education:
                education_suggestions.append("Add your educational background")
            else:
                has_dates = any(re.search(r'\b(19|20)\d{2}\b', edu) for edu in education)
                has_degree = any(re.search(r'\b(bachelor|master|phd|b\.|m\.|diploma)\b', 
                                         edu.lower()) for edu in education)
                has_gpa = any(re.search(r'\b(gpa|cgpa|grade|percentage)\b', 
                                      edu.lower()) for edu in education)
                
                if not has_dates:
                    education_suggestions.append("Include graduation dates")
                if not has_degree:
                    education_suggestions.append("Specify your degree type")
                if not has_gpa and job_requirements.get('require_gpa', False):
                    education_suggestions.append("Include your GPA if it's above 3.0")
            
            format_suggestions = []
            if format_score < 100:
                format_suggestions.extend(format_deductions)
            
            # Calculate section-specific scores
            contact_score = 100 - (len(contact_suggestions) * 25)  # -25 for each missing item
            summary_score = 100 - (len(summary_suggestions) * 33)  # -33 for each issue
            skills_score = keyword_match['score']
            experience_score = 100 - (len(experience_suggestions) * 25)
            education_score = 100 - (len(education_suggestions) * 25)
            
            # Calculate overall ATS score with weighted components
            ats_score = (
                int(round(contact_score * 0.1)) +      # 10% weight for contact info
                int(round(summary_score * 0.1)) +      # 10% weight for summary
                int(round(skills_score * 0.3)) +       # 30% weight for skills match
                int(round(experience_score * 0.2)) +   # 20% weight for experience
                int(round(education_score * 0.1)) +    # 10% weight for education
                int(round(format_score * 0.2))         # 20% weight for formatting
            )
            
            # Combine all suggestions into a single list
            suggestions = []
            suggestions.extend(contact_suggestions)
            suggestions.extend(summary_suggestions)
            suggestions.extend(skills_suggestions)
            suggestions.extend(experience_suggestions)
            suggestions.extend(education_suggestions)
            suggestions.extend(format_suggestions)
            
            if not suggestions:
                suggestions.append("Your resume is well-optimized for ATS systems")
            
            # Return final structured result
            return {
                **personal_info,  # Include extracted personal info
                'ats_score': ats_score,
                'document_type': 'resume',
                'keyword_match': keyword_match,
                'section_score': section_score,
                'format_score': format_score,
                'education': education,
                'experience': experience,
                'projects': projects,
                'skills': skills,
                'summary': summary,
                'suggestions': suggestions,
                'contact_suggestions': contact_suggestions,
                'summary_suggestions': summary_suggestions,
                'skills_suggestions': skills_suggestions,
                'experience_suggestions': experience_suggestions,
                'education_suggestions': education_suggestions,
                'format_suggestions': format_suggestions,
                'section_scores': {
                    'contact': contact_score,
                    'summary': summary_score,
                    'skills': skills_score,
                    'experience': experience_score,
                    'education': education_score,
                    'format': format_score
                }
            }
        except Exception as e:
            import traceback
            print(f"Error analyzing resume: {str(e)}")
            print(traceback.format_exc())
            # Return a default error response
            return {
                'error': f"Resume analysis failed: {str(e)}",
                'ats_score': 0,
                'document_type': 'unknown',
                'keyword_match': {'score': 0, 'found_skills': [], 'missing_skills': []},
                'section_score': 0,
                'format_score': 0,
                'suggestions': [f"Error analyzing resume: {str(e)}. Please check your file and try again."]
            }