import streamlit as st
import os
from dotenv import load_dotenv
from langchain_aws import BedrockEmbeddings, BedrockLLM
import boto3
from langchain_core.prompts import PromptTemplate
import docx
import zipfile
import PyPDF2
import io
from typing import List, Dict
import pandas as pd
from io import BytesIO
from pathlib import Path

def extract_text_from_file(file_content: bytes, file_extension: str) -> str:
    """Extract text from different file types"""
    text = ""
    try:
        if file_extension == '.pdf':
            pdf_reader = PyPDF2.PdfReader(BytesIO(file_content))
            for page in pdf_reader.pages:
                text += page.extract_text() + "\n"
        elif file_extension in ['.docx', '.doc']:
            doc = docx.Document(BytesIO(file_content))
            for paragraph in doc.paragraphs:
                text += paragraph.text + "\n"
        elif file_extension == '.txt':
            text = file_content.decode('utf-8')
    except Exception as e:
        print(f"Error extracting text from {file_extension} file: {str(e)}")
    return text

def process_zip_file(zip_content: bytes, blacklist: set) -> List[dict]:
    """Process contents of a ZIP file"""
    processed_files = []
    
    with zipfile.ZipFile(BytesIO(zip_content)) as z:
        for zip_filename in z.namelist():
            if not zip_filename.endswith(('.txt', '.docx', '.doc', '.pdf')):
                continue
                
            try:
                with z.open(zip_filename) as f:
                    file_content = f.read()
                    file_extension = Path(zip_filename).suffix.lower()
                    text = extract_text_from_file(file_content, file_extension)
                    
                    if text and not check_for_blacklisted_companies(text, blacklist):
                        processed_files.append({
                            "id": f"{zip_filename}_{hash(text)}",
                            "name": zip_filename,
                            "content": text
                        })
                    else:
                        print(f"Skipping {zip_filename} from ZIP - contains blacklisted company")
            except Exception as e:
                print(f"Error processing {zip_filename} from ZIP: {str(e)}")
                
    return processed_files


def load_blacklist() -> set:
    """Load blacklisted company names from a file"""
    try:
        with open('blacklist.txt', 'r', encoding='utf-8') as file:
            # Convert to lowercase and remove whitespace
            return {line.strip().lower() for line in file if line.strip()}
    except FileNotFoundError:
        print("Warning: blacklist.txt not found. Creating empty blacklist.")
        # Create empty blacklist file
        with open('blacklist.txt', 'w', encoding='utf-8') as file:
            pass
        return set()

def check_for_blacklisted_companies(text: str, blacklist: set) -> bool:
    """
    Check if any blacklisted company names appear in the text
    Args:
        text: The text to check
        blacklist: Set of blacklisted company names
    Returns:
        True if blacklisted company found, False otherwise
    """
    if not text or not blacklist:
        return False
    
    text_lower = text.lower()
    return any(company in text_lower for company in blacklist)


def save_uploaded_resumes(uploaded_files):
    """Save uploaded resume files to the Docs folder"""
    docs_folder = Path("Docs")
    docs_folder.mkdir(exist_ok=True)
    blacklist = load_blacklist()
    
    saved_files = []
    for uploaded_file in uploaded_files:
        try:
            content = uploaded_file.read()
            file_extension = Path(uploaded_file.name).suffix.lower()
            
            # Handle ZIP files
            if file_extension == '.zip':
                processed_zip_files = process_zip_file(content, blacklist)
                for processed_file in processed_zip_files:
                    zip_content = processed_file["content"].encode('utf-8')
                    new_filename = processed_file["name"]
                    counter = 1
                    
                    while (docs_folder / new_filename).exists():
                        base_name = Path(new_filename).stem
                        ext = Path(new_filename).suffix
                        new_filename = f"{base_name}_{counter}{ext}"
                        counter += 1
                        
                    file_path = docs_folder / new_filename
                    with open(file_path, "wb") as f:
                        f.write(zip_content)
                    saved_files.append(new_filename)
                
            else:
                # Handle individual files
                text = extract_text_from_file(content, file_extension)
                
                if text and not check_for_blacklisted_companies(text, blacklist):
                    base_name = Path(uploaded_file.name).stem
                    new_filename = uploaded_file.name
                    counter = 1
                    
                    while (docs_folder / new_filename).exists():
                        new_filename = f"{base_name}_{counter}{file_extension}"
                        counter += 1
                        
                    file_path = docs_folder / new_filename
                    with open(file_path, "wb") as f:
                        uploaded_file.seek(0)
                        f.write(uploaded_file.getbuffer())
                    saved_files.append(new_filename)
                else:
                    print(f"Skipping {uploaded_file.name} - contains blacklisted company")
                    
        except Exception as e:
            print(f"Error processing {uploaded_file.name}: {str(e)}")
            
    return saved_files

def upload_section():
    st.subheader("Upload Resumes")
    uploaded_files = st.file_uploader(
        "Upload one or more resumes", 
        type=['pdf', 'docx', 'doc', 'txt', 'zip'],
        accept_multiple_files=True
    )
    
    if uploaded_files:
        if st.button("Process Uploaded Resumes"):
            saved_files = save_uploaded_resumes(uploaded_files)
            if saved_files:
                st.success(f"Successfully saved {len(saved_files)} files to Docs folder")
                st.write("Saved files:", ", ".join(saved_files))
                if any(f.endswith('.zip') for f in [f.name for f in uploaded_files]):
                    st.info("ZIP files were processed and their contents were extracted")
            else:
                st.warning("No files were saved. They may contain blacklisted content")


def create_aws_client():
    """Create and return AWS Bedrock client"""
    AWS_ACCESS_KEY = os.getenv('ACCESS_KEY')
    AWS_SECRET_ACCESS_KEY = os.getenv('SECRET_ACCESS_KEY')
    REGION_NAME = os.getenv('REGION')

    bedrock_client = boto3.client(
        'bedrock-runtime',
        region_name=REGION_NAME,
        aws_access_key_id=AWS_ACCESS_KEY,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY
    )
    return bedrock_client

def process_docs_folder(folder_path: str) -> List[dict]:
    """Process all documents in the specified folder"""
    processed_files = []
    blacklist = load_blacklist()
    
    try:
        if not os.path.exists(folder_path):
            raise Exception(f"Folder not found: {folder_path}")
            
        for filename in os.listdir(folder_path):
            file_path = os.path.join(folder_path, filename)
            
            if not os.path.isfile(file_path):
                continue
                
            file_extension = f".{filename.split('.')[-1].lower()}"
            
            try:
                with open(file_path, 'rb') as file:
                    content = file.read()
                    
                if file_extension == '.zip':
                    processed_files.extend(process_zip_file(content, blacklist))
                else:
                    text = extract_text_from_file(content, file_extension)
                    if text and not check_for_blacklisted_companies(text, blacklist):
                        processed_files.append({
                            "id": f"{filename}_{hash(text)}",
                            "name": filename,
                            "content": text
                        })
                    else:
                        print(f"Skipping {filename} - contains blacklisted company")
                        
            except Exception as e:
                print(f"Error processing {filename}: {str(e)}")
                
    except Exception as e:
        raise Exception(f"Error accessing docs folder: {str(e)}")
        
    return processed_files

def check_resume_relevance(job_desc: str, resume_content: str, required_skills: List[str], client) -> dict:
    """Check if resume matches required skills and job description"""
    llm = BedrockLLM(
        model_id="amazon.titan-text-lite-v1", 
        client=client
    )

    # First, do a direct text search for skills
    resume_lower = resume_content.lower()
    found_skills_direct = []
    for skill in required_skills:
        if skill.lower() in resume_lower:
            found_skills_direct.append(skill)

    # If we found any skills directly, proceed with detailed analysis
    if found_skills_direct:
        relevance_prompt = PromptTemplate.from_template("""
            Analyze this resume for the following skills. Be lenient in matching skills.
            
            Required Skills to Check:
            {skills}
            
            Resume Content:
            {resume}
            
            For each skill, determine:
            1. If it's present (including variations and related technologies)
            2. The experience level with the skill
            3. How recently it was used
            
            Respond in this format:
            {{
                "skills_found": [
                    List of skills found (including variations)
                ],
                "match_percentage": Percentage of required skills found (0-100),
                "skill_details": {{
                    "skill_name": {{
                        "found": true/false,
                        "experience": "description of experience",
                        "evidence": "where found in resume"
                    }}
                }}
            }}
            
            Be generous in skill matching. If you find related technologies or variations, count them as matches.
        """)
        
        message = relevance_prompt.format(
            skills="\n".join([f"- {skill}" for skill in required_skills]),
            resume=resume_content
        )
        
        try:
            response = llm.invoke(message)
            response_lower = response.lower()
            
            # Calculate match score
            total_skills = len(required_skills)
            found_skills = len(found_skills_direct)
            match_score = (found_skills / total_skills) * 100 if total_skills > 0 else 0
            
            # Determine relevance (more lenient threshold)
            is_relevant = match_score >= 50  # Lower threshold for relevance
            
            return {
                "is_relevant": is_relevant,
                "score": match_score,
                "found_skills": found_skills_direct,
                "total_skills": total_skills,
                "key_matches": response
            }
            
        except Exception as e:
            print(f"Error in LLM analysis: {e}")
            # Fall back to direct matching results
            match_score = (len(found_skills_direct) / len(required_skills)) * 100
            return {
                "is_relevant": len(found_skills_direct) > 0,  # Consider relevant if any skills found
                "score": match_score,
                "found_skills": found_skills_direct,
                "total_skills": len(required_skills),
                "key_matches": f"Skills found through direct matching: {', '.join(found_skills_direct)}"
            }
    else:
        # If no direct matches, do a more lenient check with LLM
        lenient_prompt = PromptTemplate.from_template("""
            Analyze this resume for skills related to or equivalent to:
            {skills}
            
            Consider variations and related technologies.
            
            Resume Content:
            {resume}
            
            List any matches found, including:
            1. Direct matches
            2. Related technologies
            3. Equivalent skills
            
            Respond with found matches only.
        """)
        
        message = lenient_prompt.format(
            skills="\n".join([f"- {skill}" for skill in required_skills]),
            resume=resume_content
        )
        
        try:
            response = llm.invoke(message)
            # Check if any skills are mentioned in the response
            found_skills = []
            for skill in required_skills:
                if skill.lower() in response.lower():
                    found_skills.append(skill)
            
            match_score = (len(found_skills) / len(required_skills)) * 100 if required_skills else 0
            return {
                "is_relevant": len(found_skills) > 0,  # Consider relevant if any skills found
                "score": match_score,
                "found_skills": found_skills,
                "total_skills": len(required_skills),
                "key_matches": response
            }
        except Exception as e:
            print(f"Error in lenient LLM analysis: {e}")
            return {
                "is_relevant": False,
                "score": 0,
                "found_skills": [],
                "total_skills": len(required_skills),
                "key_matches": "Error in analysis"
            }

def get_summary_from_llm(job_desc: str, resume_content: str, required_skills: List[str], client) -> str:
    """Generate detailed analysis of resume against requirements"""
    llm = BedrockLLM(
        model_id="amazon.titan-text-lite-v1", 
        client=client
    )

    map_prompt_template = PromptTemplate.from_template("""
        Provide a detailed analysis of this resume against the job requirements.
        
        Required Skills:
        {skills}
        
        Additional Requirements:
        {job_desc}
        
        Resume Content:
        {resume_details}
        
        Provide analysis in this format:

        ## Skills Analysis
        ### Required Skills Match
        {skills_analysis}
        
        ### Technical Proficiency
        - For each required skill:
          * Experience level
          * Years of usage
          * Recent projects
        
        ### Additional Technical Skills
        - Only list relevant additional skills

        ## Experience Analysis
        - Total years of relevant experience
        - Key projects using multiple required skills
        - Notable achievements with required technologies

        ## Overall Assessment
        - Skills Match Score: X/Y required skills found
        - Technical Proficiency Score: (0-100)
        - Experience Level Match: (Junior/Mid/Senior)

        ## Recommendation
        - Hiring Decision: (Strong Match/Potential Match/Not Recommended)
        - Key Strengths: (list top 3)
        - Areas to Verify: (list specific areas)
        
        Focus only on exact matches and verifiable experience.
    """)
    
    message = map_prompt_template.format(
        skills="\n".join([f"- {skill}" for skill in required_skills]),
        job_desc=job_desc,
        resume_details=resume_content,
        skills_analysis="\n".join([f"- {skill}: Found/Not Found, Experience Level, Evidence" for skill in required_skills])
    )
    
    try:
        summary = llm.invoke(message)
        return summary
    except Exception as e:
        return f"Error generating analysis: {str(e)}"
    
def export_to_excel(matches: List[dict], required_skills: List[str]) -> BytesIO:
    """Create Excel report from matches"""
    # Prepare data for Excel
    excel_data = []
    
    for match in matches:
        found_skills = match.get('found_skills', [])
        
        row_data = {
            'Candidate Name': match['name'],
            'Match Score': f"{match.get('match_score', 0):.1f}%",
            'Skills Found': ', '.join(found_skills),
            'Missing Skills': ', '.join([skill for skill in required_skills if skill not in found_skills]),
            'Total Skills Found': len(found_skills),
            'Total Required Skills': len(required_skills)
        }
        
        # Add individual skill columns
        for skill in required_skills:
            row_data[f'Skill - {skill}'] = '✓' if skill in found_skills else '✗'
            
        excel_data.append(row_data)
    
    # Create DataFrame
    df = pd.DataFrame(excel_data)
    
    # Create Excel file
    output = BytesIO()
    with pd.ExcelWriter(output, engine='openpyxl') as writer:
        df.to_excel(writer, index=False, sheet_name='Resume Matches')
        
        # Get workbook and worksheet
        workbook = writer.book
        worksheet = writer.sheets['Resume Matches']
        
        # Format columns
        for idx, col in enumerate(df.columns):
            # Get maximum length of column content
            max_length = max(
                df[col].astype(str).apply(len).max(),
                len(col)
            )
            # Set column width
            worksheet.column_dimensions[chr(65 + idx)].width = min(max_length + 2, 50)
    
    return output

def main():
    try:
        # Load environment variables and setup
        load_dotenv()
        client = create_aws_client()

        # Streamlit UI setup
        st.set_page_config(
            page_title="Resume Screening Assistant",
            layout="wide"
        )
        
        st.title("Resume Screening AI Assistant")
        st.subheader("Match resumes with required skills and experience")
        upload_section()
        # Skills input
        st.write("Enter required skills (one per line):")
        skills_input = st.text_area(
            "Required Skills",
            placeholder="Example:\nPython\nJava\nAWS\nDocker",
            height=150
        )

        # Additional requirements
        additional_reqs = st.text_area(
            "Additional Requirements (optional)",
            placeholder="Enter any additional requirements like:\n- Years of experience\n- Education\n- Specific domain knowledge",
            height=100
        )

        # Process inputs
        required_skills = [skill.strip() for skill in skills_input.split('\n') if skill.strip()]
        
        analyze_button = st.button("Find Matching Profiles", use_container_width=True)

        if analyze_button:
            if not required_skills:
                st.error("Please enter at least one required skill!")
                return

            docs_folder = os.path.join(os.path.dirname(__file__), 'Docs')
            
            with st.spinner("Analyzing resumes..."):
                try:
                    processed_files = process_docs_folder(docs_folder)
                    
                    if not processed_files:
                        st.error("No resumes found in the Docs folder!")
                        return
                    
                    # Analyze each resume
                    matches = []
                    progress_bar = st.progress(0)
                    
                    for idx, file_data in enumerate(processed_files):
                        progress = (idx + 1) / len(processed_files)
                        progress_bar.progress(progress)
                        
                        relevance = check_resume_relevance(
                            additional_reqs, 
                            file_data['content'],
                            required_skills,
                            client
                        )
                        
                        if relevance['found_skills']:  # Show if any skills found
                            matches.append({
                                **file_data,
                                "match_score": relevance['score'],
                                "found_skills": relevance['found_skills'],
                                "total_skills": relevance['total_skills'],
                                "key_matches": relevance['key_matches']
                            })
                    
                    progress_bar.empty()
                    
                    # Sort matches by score
                    matches.sort(key=lambda x: x['match_score'], reverse=True)
                    
                    if matches:
                        st.success(f"Found {len(matches)} profiles with matching skills")
                        
                        # Create columns for filters
                        col1, col2 = st.columns(2)
                        with col1:
                            min_score = st.slider(
                                "Minimum Match Score",
                                min_value=0,
                                max_value=100,
                                value=50,
                                step=5
                            )
                        with col2:
                            min_skills = st.slider(
                                "Minimum Required Skills",
                                min_value=0,
                                max_value=len(required_skills),
                                value=1,
                                step=1
                            )

                        # Filter matches based on criteria
                        filtered_matches = [
                            match for match in matches 
                            if match['match_score'] >= min_score and 
                            len(match['found_skills']) >= min_skills
                        ]

                        # Display filtered matches
                        st.subheader(f"Showing {len(filtered_matches)} matches meeting criteria")
                        
                        # Display matches
                        for idx, match in enumerate(filtered_matches):
                            with st.container():
                                st.markdown("---")
                                col1, col2 = st.columns([1, 3])
                                
                                with col1:
                                    st.subheader(f"Match #{idx + 1}")
                                    st.write(f"📄 {match['name']}")
                                    st.write(f"Match Score: {match['match_score']:.1f}%")
                                    
                                    # Display skills breakdown
                                    st.write("Skills Found:")
                                    found_skills = match.get('found_skills', [])
                                    for skill in required_skills:
                                        if skill in found_skills:
                                            st.write(f"✅ {skill}")
                                        else:
                                            st.write(f"❌ {skill}")
                                
                                with col2:
                                    with st.expander("Show Detailed Analysis"):
                                        analysis = get_summary_from_llm(
                                            additional_reqs,
                                            match['content'],
                                            required_skills,
                                            client
                                        )
                                        st.markdown(analysis)
                        
                        # Add export section
                        st.markdown("---")
                        st.subheader("Export Results")
                        
                        # Create Excel file
                        excel_output = export_to_excel(filtered_matches, required_skills)
                        
                        # Add download button with count of matches
                        st.download_button(
                            label=f"📥 Download Excel Report ({len(filtered_matches)} matches)",
                            data=excel_output.getvalue(),
                            file_name="resume_matches.xlsx",
                            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
                        )
                        
                        # Display summary statistics
                        st.markdown("---")
                        st.subheader("Summary Statistics")
                        col1, col2, col3, col4 = st.columns(4)
                        
                        with col1:
                            st.metric("Total Matches", len(filtered_matches))
                        with col2:
                            avg_score = sum(match['match_score'] for match in filtered_matches) / len(filtered_matches)
                            st.metric("Average Match Score", f"{avg_score:.1f}%")
                        with col3:
                            perfect_matches = sum(1 for match in filtered_matches if match['match_score'] == 100)
                            st.metric("Perfect Matches", perfect_matches)
                        with col4:
                            avg_skills = sum(len(match['found_skills']) for match in filtered_matches) / len(filtered_matches)
                            st.metric("Avg. Skills Found", f"{avg_skills:.1f}")

                        # Add skill distribution chart
                        st.subheader("Skill Distribution")
                        skill_counts = {skill: 0 for skill in required_skills}
                        for match in filtered_matches:
                            for skill in match['found_skills']:
                                if skill in skill_counts:
                                    skill_counts[skill] += 1
                        
                        # Create DataFrame for chart
                        chart_data = pd.DataFrame({
                            'Skill': list(skill_counts.keys()),
                            'Count': list(skill_counts.values())
                        })
                        
                        # Display bar chart
                        st.bar_chart(chart_data.set_index('Skill'))

                    else:
                        st.warning(
                            "No profiles found matching the required skills. "
                            "Try adjusting the requirements or adding more resumes."
                        )
                    
                except Exception as e:
                    st.error(f"Error during analysis: {str(e)}")
                    print(f"Error Details: {e}")
                    
    except Exception as error:
        st.error(f"An error occurred: {str(error)}")
        print(f"Error Details: {error}")

if __name__ == "__main__":
    main()