import streamlit as st
import pandas as pd
import os
from datetime import datetime
import docx2txt
# Try importing Document from python-docx, but don't fail if not available
try:
    from docx import Document
    DOCX_AVAILABLE = True
except ImportError:
    DOCX_AVAILABLE = False
from PyPDF2 import PdfReader
import openai
from dotenv import load_dotenv
import tempfile
import plotly.express as px
import plotly.graph_objects as go
from database import ResumeDatabase

# Load environment variables
load_dotenv()

# Initialize OpenAI API key and database
openai.api_key = os.getenv('OPENAI_API_KEY')
db = ResumeDatabase()

def extract_text_from_pdf(file):
    """Extract text from PDF file"""
    try:
        pdf_reader = PdfReader(file)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        st.error(f"Error extracting text from PDF: {str(e)}")
        return None

def extract_text_from_docx(file):
    """Extract text from DOCX file"""
    try:
        # Try using docx2txt first
        try:
            text = docx2txt.process(file)
            return text
        except Exception as e1:
            # If docx2txt fails and python-docx is available, try that
            if DOCX_AVAILABLE:
                doc = Document(file)
                text = ""
                for paragraph in doc.paragraphs:
                    text += paragraph.text + "\n"
                return text
            else:
                st.error("Could not process DOCX file. Please ensure python-docx is installed.")
                return None
    except Exception as e:
        st.error(f"Error extracting text from DOCX: {str(e)}")
        return None

def analyze_resume(text):
    """Analyze resume text using OpenAI API"""
    try:
        system_prompt = """You are a professional resume analyzer specializing in Data Science and Data Engineering roles. 
        Analyze the following resume and extract key information in a structured format.
        
        Return ONLY a Python dictionary with the following keys (no other text):
        {
            "name": "extracted name",
            "email": "extracted email",
            "phone": "extracted phone",
            "location": "extracted location",
            "linkedin_url": "extracted LinkedIn URL",
            "github_url": "extracted GitHub URL",
            "portfolio_url": "extracted portfolio URL",
            "cgpa": "extracted CGPA",
            "years_experience": "extracted years of experience as a number",
            "education_level": "highest education level",
            "major": "extracted major/field of study",
            "university": "extracted university name",
            "internships": ["list of internships"],
            "programming_languages": ["list of programming languages"],
            "technical_skills": ["list of technical skills"],
            "job_titles": ["list of job titles"],
            "certifications": ["list of certifications"],
            "ml_frameworks": ["list of ML frameworks"],
            "visualization_tools": ["list of visualization tools"],
            "statistical_tools": ["list of statistical tools"],
            "big_data_tools": ["list of big data tools"],
            "cloud_platforms": ["list of cloud platforms"],
            "deep_learning": ["list of deep learning skills"],
            "nlp_skills": ["list of NLP skills"],
            "computer_vision": ["list of computer vision skills"],
            "databases": ["list of databases"],
            "etl_tools": ["list of ETL tools"],
            "data_warehousing": ["list of data warehousing tools"],
            "orchestration_tools": ["list of orchestration tools"],
            "streaming_tech": ["list of streaming technologies"],
            "data_modeling": ["list of data modeling skills"],
            "data_governance": ["list of data governance experience"],
            "data_quality_tools": ["list of data quality tools"],
            "projects": ["list of projects"],
            "publications": ["list of publications"],
            "research_experience": "yes/no or details of research experience",
            "hackathons": ["list of hackathons"],
            "awards": ["list of awards"],
            "soft_skills": ["list of soft skills"],
            "domain_expertise": ["list of domain expertise"],
            "languages": ["list of languages"],
            "leadership_experience": "yes/no or details of leadership experience",
            "team_size": "number of people managed",
            "code_quality": ["list of code quality metrics"],
            "project_impact": ["list of project impact metrics"],
            "performance_improvements": ["list of performance improvements"],
            "version_control": ["list of version control systems"],
            "ci_cd_tools": ["list of CI/CD tools"],
            "testing_frameworks": ["list of testing frameworks"],
            "agile_experience": "yes/no or details of agile experience",
            "system_architecture": ["list of system architecture experience"],
            "business_domain": ["list of business domains"],
            "industry_certifications": ["list of industry certifications"],
            "domain_tools": ["list of domain-specific tools"],
            "compliance_knowledge": ["list of compliance knowledge"],
            "confidence_score": 0.95
        }

        For any field where information is not found:
        - Use "Not found" for string fields
        - Use [] for list fields
        - Use 0 for numeric fields
        - Use "no" for yes/no fields
        
        Ensure all fields are included in the response, even if empty."""

        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": text}
            ],
            temperature=0.2
        )
        
        # Get the response content
        result_str = response['choices'][0]['message']['content'].strip()
        
        # Clean up the response string to ensure it's valid Python dict syntax
        result_str = result_str.replace('```python', '').replace('```', '').strip()
        
        # Safely evaluate the string to a Python dictionary
        try:
            result = eval(result_str)
        except:
            import ast
            result = ast.literal_eval(result_str)
            
        # Ensure all required fields are present with default values
        required_fields = {
            'name': 'Not found',
            'email': 'Not found',
            'phone': 'Not found',
            'location': 'Not found',
            'linkedin_url': 'Not found',
            'github_url': 'Not found',
            'portfolio_url': 'Not found',
            'cgpa': 'Not found',
            'years_experience': 0,
            'education_level': 'Not found',
            'major': 'Not found',
            'university': 'Not found',
            'programming_languages': [],
            'technical_skills': [],
            'job_titles': [],
            'certifications': [],
            'ml_frameworks': [],
            'visualization_tools': [],
            'projects': [],
            'publications': [],
            'research_experience': 'no',
            'awards': [],
            'leadership_experience': 'no',
            'team_size': 0
        }
        
        # Update with default values for missing fields
        for field, default_value in required_fields.items():
            if field not in result or result[field] is None:
                result[field] = default_value
            elif isinstance(default_value, list) and not isinstance(result[field], list):
                result[field] = [result[field]] if result[field] != "Not found" else []
        
        return result
    except Exception as e:
        st.error(f"Error analyzing resume: {str(e)}")
        return None

def display_statistics():
    """Display statistics and visualizations of the analyzed resumes"""
    stats = db.get_statistics()
    
    st.subheader("📊 Resume Analysis Statistics")
    
    # Basic stats in three columns
    col1, col2, col3 = st.columns(3)
    with col1:
        st.metric("Total Resumes Analyzed", stats['total_resumes'])
    with col2:
        st.metric("Average Years of Experience", f"{stats['avg_work_experience']:.1f}")
    with col3:
        st.metric("Universities Represented", len(stats['university_distribution']))

    # Education Distribution
    st.subheader("🎓 Education Statistics")
    col1, col2 = st.columns(2)
    
    with col1:
        if stats['education_levels']:
            fig = px.pie(
                values=list(stats['education_levels'].values()),
                names=list(stats['education_levels'].keys()),
                title="Education Levels"
            )
            st.plotly_chart(fig, use_container_width=True)
    
    with col2:
        if stats['major_distribution']:
            fig = px.pie(
                values=list(stats['major_distribution'].values()),
                names=list(stats['major_distribution'].keys()),
                title="Major Distribution"
            )
            st.plotly_chart(fig, use_container_width=True)

    # Technical Skills Section
    st.subheader("💻 Technical Expertise")
    
    # Programming Languages and Technical Skills
    col1, col2 = st.columns(2)
    with col1:
        if stats['top_programming_languages']:
            fig = px.bar(
                x=list(stats['top_programming_languages'].keys()),
                y=list(stats['top_programming_languages'].values()),
                title="Top Programming Languages"
            )
            st.plotly_chart(fig, use_container_width=True)
    
    with col2:
        if stats['top_technical_skills']:
            fig = px.bar(
                x=list(stats['top_technical_skills'].keys()),
                y=list(stats['top_technical_skills'].values()),
                title="Top Technical Skills"
            )
            st.plotly_chart(fig, use_container_width=True)

    # Data Science Specific Skills
    st.subheader("🔬 Data Science Expertise")
    col1, col2 = st.columns(2)
    
    with col1:
        if stats['top_ml_frameworks']:
            fig = px.bar(
                x=list(stats['top_ml_frameworks'].keys()),
                y=list(stats['top_ml_frameworks'].values()),
                title="Top ML Frameworks"
            )
            st.plotly_chart(fig, use_container_width=True)
    
    with col2:
        if stats['top_visualization_tools']:
            fig = px.bar(
                x=list(stats['top_visualization_tools'].keys()),
                y=list(stats['top_visualization_tools'].values()),
                title="Top Visualization Tools"
            )
            st.plotly_chart(fig, use_container_width=True)

    # Data Engineering Specific Skills
    st.subheader("⚙️ Data Engineering Expertise")
    col1, col2, col3 = st.columns(3)
    
    with col1:
        if stats['top_databases']:
            fig = px.bar(
                x=list(stats['top_databases'].keys()),
                y=list(stats['top_databases'].values()),
                title="Top Databases"
            )
            st.plotly_chart(fig, use_container_width=True)
    
    with col2:
        if stats['top_etl_tools']:
            fig = px.bar(
                x=list(stats['top_etl_tools'].keys()),
                y=list(stats['top_etl_tools'].values()),
                title="Top ETL Tools"
            )
            st.plotly_chart(fig, use_container_width=True)
    
    with col3:
        if stats['top_streaming_tech']:
            fig = px.bar(
                x=list(stats['top_streaming_tech'].keys()),
                y=list(stats['top_streaming_tech'].values()),
                title="Top Streaming Technologies"
            )
            st.plotly_chart(fig, use_container_width=True)

    # Cloud & Big Data
    st.subheader("☁️ Cloud & Big Data Expertise")
    col1, col2 = st.columns(2)
    
    with col1:
        if stats['top_cloud_platforms']:
            fig = px.bar(
                x=list(stats['top_cloud_platforms'].keys()),
                y=list(stats['top_cloud_platforms'].values()),
                title="Top Cloud Platforms"
            )
            st.plotly_chart(fig, use_container_width=True)
    
    with col2:
        if stats['top_certifications']:
            fig = px.bar(
                x=list(stats['top_certifications'].keys()),
                y=list(stats['top_certifications'].values()),
                title="Top Certifications"
            )
            st.plotly_chart(fig, use_container_width=True)

def display_rankings():
    """Display candidate rankings with filtering options"""
    st.subheader("🏆 Candidate Rankings")
    
    # Role selection
    role_type = st.selectbox(
        "Select Role Type",
        ["both", "data_science", "data_engineering"],
        format_func=lambda x: {
            "both": "Both Roles",
            "data_science": "Data Science",
            "data_engineering": "Data Engineering"
        }[x]
    )
    
    # Minimum score filter
    min_score = st.slider("Minimum Score", 0, 100, 50)
    
    # Get rankings
    rankings = db.get_candidate_rankings(role_type, min_score)
    
    if not rankings:
        st.warning("No candidates found matching the criteria.")
        return
    
    # Display top candidates
    st.write(f"Found {len(rankings)} candidates matching the criteria")
    
    for i, candidate in enumerate(rankings, 1):
        with st.expander(f"#{i}: {candidate['name']} - Score: {candidate['total_score']:.1f}"):
            col1, col2 = st.columns(2)
            
            with col1:
                st.write("📊 Score Breakdown")
                fig = go.Figure()
                scores = [
                    ('Education', candidate['education_score']),
                    ('Experience', candidate['experience_score']),
                    ('Technical', candidate['technical_score']),
                    ('Projects', candidate['project_score']),
                    ('Impact', candidate['impact_score']),
                    ('Role Specific', candidate['role_specific_score'])
                ]
                
                fig.add_trace(go.Bar(
                    x=[s[0] for s in scores],
                    y=[s[1] for s in scores],
                    text=[f"{s[1]:.1f}" for s in scores],
                    textposition='auto',
                ))
                
                fig.update_layout(
                    title="Score Components",
                    showlegend=False,
                    height=300
                )
                st.plotly_chart(fig, use_container_width=True)
            
            with col2:
                st.write("👤 Candidate Information")
                st.write(f"Email: {candidate['email']}")
                st.write(f"Experience: {candidate['years_experience']}")
                st.write(f"Education: {candidate['education_level']}")
                st.write("Key Skills:")
                for skill in candidate['key_skills']:
                    st.write(f"- {skill}")

def main():
    st.title("Resume Analyzer")
    
    # Sidebar navigation
    page = st.sidebar.selectbox(
        "Choose a page",
        ["Upload Resume", "View Statistics", "View Rankings", "Export Data"]
    )

    if page == "Upload Resume":
        st.write("Upload resumes in PDF or DOCX format for analysis")

        # Check for API key
        if not os.getenv('OPENAI_API_KEY'):
            st.error("Please set your OpenAI API key in the .env file")
            return

        uploaded_file = st.file_uploader(
            "Choose a resume file",
            type=['pdf', 'docx'],
            help="Upload a resume in PDF or DOCX format"
        )

        if uploaded_file:
            with st.spinner("Processing resume..."):
                # Create a temporary file
                with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
                    tmp_file.write(uploaded_file.getvalue())
                    tmp_file_path = tmp_file.name

                # Extract text based on file type
                file_extension = uploaded_file.name.split('.')[-1].lower()
                if file_extension == 'pdf':
                    text = extract_text_from_pdf(tmp_file_path)
                elif file_extension == 'docx':
                    text = extract_text_from_docx(tmp_file_path)

                # Clean up temporary file
                os.unlink(tmp_file_path)

                if text:
                    # Analyze the resume
                    analysis_result = analyze_resume(text)
                    
                    if analysis_result:
                        # Save to database
                        db.save_analysis(analysis_result, text)
                        st.success("Resume analyzed successfully!")
                        
                        # Calculate and display score
                        scores = db.calculate_score(analysis_result)
                        
                        st.subheader("📊 Candidate Score")
                        col1, col2, col3 = st.columns(3)
                        with col1:
                            st.metric("Total Score", f"{scores['total_score']:.1f}/100")
                        with col2:
                            st.metric("Technical Score", f"{scores['technical_score']:.1f}/20")
                        with col3:
                            st.metric("Experience Score", f"{scores['experience_score']:.1f}/20")
                        
                        # Display detailed scores
                        fig = go.Figure()
                        score_components = [
                            ('Education', scores['education_score'], 20),
                            ('Experience', scores['experience_score'], 20),
                            ('Technical', scores['technical_score'], 20),
                            ('Projects', scores['project_score'], 15),
                            ('Impact', scores['impact_score'], 15),
                            ('Role Specific', scores['role_specific_score'], 10)
                        ]
                        
                        fig.add_trace(go.Bar(
                            name='Score',
                            x=[s[0] for s in score_components],
                            y=[s[1] for s in score_components],
                            text=[f"{s[1]:.1f}/{s[2]}" for s in score_components],
                            textposition='auto',
                        ))
                        
                        fig.update_layout(
                            title="Score Breakdown",
                            yaxis_title="Points",
                            showlegend=False
                        )
                        
                        st.plotly_chart(fig, use_container_width=True)
                        
                        # Display analysis results
                        st.subheader("Analysis Results")
                        col1, col2 = st.columns(2)
                        
                        with col1:
                            st.write("📚 Education")
                            st.write(f"CGPA: {analysis_result.get('cgpa', 'Not found')}")
                            st.write(f"Education Level: {analysis_result.get('education_level', 'Not found')}")
                            st.write(f"Major: {analysis_result.get('major', 'Not found')}")
                            st.write(f"University: {analysis_result.get('university', 'Not found')}")
                            
                            st.write("💼 Experience")
                            st.write(f"Years of Experience: {analysis_result.get('years_experience', 'Not found')}")
                            st.write("Job Titles:")
                            job_titles = analysis_result.get('job_titles', [])
                            if job_titles:
                                for title in job_titles:
                                    st.write(f"- {title}")
                            else:
                                st.write("- Not found")
                        
                        with col2:
                            st.write("🔧 Technical Skills")
                            
                            # Programming Languages
                            st.write("Programming Languages:")
                            prog_langs = analysis_result.get('programming_languages', [])
                            if prog_langs:
                                for lang in prog_langs:
                                    st.write(f"- {lang}")
                            else:
                                st.write("- Not found")
                            
                            # Data Science Skills
                            st.write("Data Science Skills:")
                            ds_skills = (
                                analysis_result.get('ml_frameworks', []) +
                                analysis_result.get('deep_learning', []) +
                                analysis_result.get('nlp_skills', []) +
                                analysis_result.get('computer_vision', []) +
                                analysis_result.get('statistical_tools', []) +
                                analysis_result.get('visualization_tools', [])
                            )
                            if ds_skills:
                                for skill in ds_skills:
                                    st.write(f"- {skill}")
                            else:
                                st.write("- Not found")
                            
                            # Data Engineering Skills
                            st.write("Data Engineering Skills:")
                            de_skills = (
                                analysis_result.get('databases', []) +
                                analysis_result.get('etl_tools', []) +
                                analysis_result.get('data_warehousing', []) +
                                analysis_result.get('orchestration_tools', []) +
                                analysis_result.get('streaming_tech', []) +
                                analysis_result.get('data_modeling', [])
                            )
                            if de_skills:
                                for skill in de_skills:
                                    st.write(f"- {skill}")
                            else:
                                st.write("- Not found")
                            
                            # Cloud & Tools
                            st.write("Cloud & Tools:")
                            cloud_tools = (
                                analysis_result.get('cloud_platforms', []) +
                                analysis_result.get('ci_cd_tools', []) +
                                analysis_result.get('version_control', [])
                            )
                            if cloud_tools:
                                for tool in cloud_tools:
                                    st.write(f"- {tool}")
                            else:
                                st.write("- Not found")
                        
                        # Additional Information
                        st.write("📜 Additional Information")
                        col3, col4 = st.columns(2)
                        
                        with col3:
                            # Certifications
                            st.write("Certifications:")
                            certs = analysis_result.get('certifications', [])
                            if certs:
                                for cert in certs:
                                    st.write(f"- {cert}")
                            else:
                                st.write("- Not found")
                            
                            # Projects
                            st.write("Projects:")
                            projects = analysis_result.get('projects', [])
                            if projects:
                                for project in projects:
                                    st.write(f"- {project}")
                            else:
                                st.write("- Not found")
                        
                        with col4:
                            # Publications & Research
                            st.write("Publications & Research:")
                            publications = analysis_result.get('publications', [])
                            if publications:
                                for pub in publications:
                                    st.write(f"- {pub}")
                            else:
                                st.write("- Not found")
                            
                            research_exp = analysis_result.get('research_experience', 'no')
                            if research_exp.lower() != 'no':
                                st.write("Research Experience:", research_exp)
                            
                            # Leadership & Team Size
                            leadership = analysis_result.get('leadership_experience', 'no')
                            if leadership.lower() != 'no':
                                st.write("Leadership Experience:", leadership)
                            
                            team_size = analysis_result.get('team_size', 0)
                            if team_size:
                                st.write(f"Team Size Managed: {team_size}")

    elif page == "View Statistics":
        display_statistics()
    
    elif page == "View Rankings":
        display_rankings()

    else:  # Export Data page
        st.subheader("Export Data")
        
        col1, col2 = st.columns(2)
        
        with col1:
            if st.button("Export to CSV"):
                csv_path = db.export_to_csv()
                with open(csv_path, 'rb') as f:
                    st.download_button(
                        label="Download CSV",
                        data=f,
                        file_name="resume_analyses.csv",
                        mime="text/csv"
                    )
        
        with col2:
            if st.button("Export to JSON"):
                json_path = db.export_to_json()
                with open(json_path, 'rb') as f:
                    st.download_button(
                        label="Download JSON",
                        data=f,
                        file_name="resume_analyses.json",
                        mime="application/json"
                    )

if __name__ == "__main__":
    main()