from flask import Flask, request, jsonify
import re
import json
from typing import Dict, Union

app = Flask(__name__)

# ====================== COMMON UTILITY FUNCTIONS ======================
def check_keyword_exist(text: str, keyword: str) -> bool:
    """Cek apakah keyword ada dalam teks"""
    if not text or not keyword:
        return False
    keywords = [kw.strip().lower() for kw in keyword.split(',') if kw.strip()]
    text_lower = text.lower()
    return any(kw in text_lower for kw in keywords)

def format_score(score: Union[int, float]) -> Union[int, float]:
    """Format skor untuk output"""
    if isinstance(score, float):
        return int(score) if score.is_integer() else round(score, 2)
    return score

def calculate_keyword_density(text: str, keyword: str) -> float:
    """Menghitung kepadatan keyword dalam teks"""
    if not text or not keyword:
        return 0.0
    total_words = len(text.split())
    keyword_count = sum(text.lower().count(kw.lower()) for kw in keyword.split(','))
    return (keyword_count / max(1, total_words)) * 100

# ====================== TITLE ANALYSIS ======================
def calculate_title_score(article_data: Dict[str, Union[str, Dict]]) -> Dict[str, float]:
    """Menghitung skor SEO untuk bagian Title Page"""
    title = article_data.get('title', '')
    target_keyword = article_data.get('target-keyword', '')
    
    title_scores = {
        'keyword_exist_score': 0,
        'keyword_position_score': 0.0,
        'title_length_score': 0.0,
        'title_total_score': 0.0,
        'statuses': {
            'keyword_exist': '',
            'keyword_position': '',
            'title_length': '',
            'overall': ''
        }
    }
    
    # 1. Target Keyword Exist (Bobot: 6%)
    keyword_exist = check_keyword_exist(title, target_keyword)
    title_scores['keyword_exist_score'] = 6 if keyword_exist else 0
    title_scores['statuses']['keyword_exist'] = 'Good' if keyword_exist else 'Bad'
    
    # 2. Target Keyword Position (Bobot: 3%)
    if keyword_exist:
        position_score = check_keyword_position(title, target_keyword)
        title_scores['keyword_position_score'] = position_score * 3
        if position_score == 1.0:
            title_scores['statuses']['keyword_position'] = 'Good'
        elif position_score == 0.5:
            title_scores['statuses']['keyword_position'] = 'Needs Improvement'
        else:
            title_scores['statuses']['keyword_position'] = 'Bad'
    
    # 3. Title Length (Bobot: 1%)
    length_score = check_title_length(title)
    title_scores['title_length_score'] = length_score * 1
    if length_score == 1.0:
        title_scores['statuses']['title_length'] = 'Good'
    elif length_score == 0.5:
        title_scores['statuses']['title_length'] = 'Needs Improvement'
    else:
        title_scores['statuses']['title_length'] = 'Bad'
    
    # Hitung total skor title
    title_scores['title_total_score'] = (
        float(title_scores['keyword_exist_score']) +
        title_scores['keyword_position_score'] +
        title_scores['title_length_score']
    )
    
    # Determine overall status
    total_percentage = (title_scores['title_total_score'] / 10) * 100
    if total_percentage >= 80:
        title_scores['statuses']['overall'] = 'Good'
    elif total_percentage >= 50:
        title_scores['statuses']['overall'] = 'Needs Improvement'
    else:
        title_scores['statuses']['overall'] = 'Bad'
    
    return title_scores

def check_keyword_position(title: str, keyword: str) -> float:
    """Cek posisi keyword dalam title"""
    if not title or not keyword:
        return 0.0
    first_keyword = keyword.split(',')[0].strip().lower()
    title_words = title.lower().split()
    try:
        keyword_index = title_words.index(first_keyword)
    except ValueError:
        return 0.0
    words_before = keyword_index
    if words_before <= 2: return 1.0
    if words_before <= 4: return 0.5
    return 0.0

def check_title_length(title: str) -> float:
    """Cek panjang title"""
    length = len(title)
    if 75 <= length <= 95: return 1.0
    if (40 <= length <= 74) or (95 < length <= 120): return 0.5
    return 0.0

# ====================== META DESCRIPTION ANALYSIS ======================
def calculate_meta_desc_score(article_data: Dict[str, Union[str, Dict]]) -> Dict[str, float]:
    """Menghitung skor SEO untuk bagian Meta Description"""
    meta_desc = article_data.get('meta_desc', '')
    target_keyword = article_data.get('target-keyword', '')
    related_keyword = article_data.get('related-keyword', '')
    
    meta_scores = {
        'keyword_exist_score': 0.0,
        'related_keyword_score': 0.0,
        'length_score': 0.0,
        'meta_total_score': 0.0,
        'statuses': {
            'keyword_exist': '',
            'related_keyword': '',
            'length': '',
            'overall': ''
        }
    }
    
    # 1. Target Keyword Exist (Bobot: 1%)
    keyword_exist = check_keyword_exist(meta_desc, target_keyword)
    meta_scores['keyword_exist_score'] = 1.0 if keyword_exist else 0.0
    meta_scores['statuses']['keyword_exist'] = 'Good' if keyword_exist else 'Bad'
    
    # 2. Related Keyword Exist (Bobot: 3.5%)
    related_exist = check_keyword_exist(meta_desc, related_keyword) if related_keyword else False
    meta_scores['related_keyword_score'] = 3.5 if related_exist else 0.0
    meta_scores['statuses']['related_keyword'] = 'Good' if related_exist else 'Bad'
    
    # 3. Meta Description Length (Bobot: 0.5%)
    length_status = check_meta_desc_length(meta_desc)
    if length_status == 1.0:
        meta_scores['length_score'] = 0.5
        meta_scores['statuses']['length'] = 'Good'
    elif length_status == 0.5:
        meta_scores['length_score'] = 0.25
        meta_scores['statuses']['length'] = 'Needs Improvement'
    else:
        meta_scores['length_score'] = 0.0
        meta_scores['statuses']['length'] = 'Bad'
    
    # Hitung total skor meta description
    meta_scores['meta_total_score'] = (
        meta_scores['keyword_exist_score'] +
        meta_scores['related_keyword_score'] +
        meta_scores['length_score']
    )
    
    # Determine overall status
    total_percentage = (meta_scores['meta_total_score'] / 5) * 100
    if total_percentage >= 80:
        meta_scores['statuses']['overall'] = 'Good'
    elif total_percentage >= 50:
        meta_scores['statuses']['overall'] = 'Needs Improvement'
    else:
        meta_scores['statuses']['overall'] = 'Bad'
    
    return meta_scores

def check_meta_desc_length(meta_desc: str) -> float:
    """Cek panjang meta description"""
    length = len(meta_desc)
    if 126 <= length <= 146: return 1.0
    if (100 <= length <= 125) or (146 < length <= 160): return 0.5
    return 0.0

# ====================== CONTENT ANALYSIS ======================
def calculate_content_score(article_data: Dict[str, Union[str, Dict]]) -> Dict[str, float]:
    """Menghitung skor SEO untuk bagian Konten"""
    content = article_data.get('content', '')
    target_keyword = article_data.get('target-keyword', '')
    related_keyword = article_data.get('related-keyword', '')
    
    content_scores = {
        'word_count_score': 0.0,
        'first_para_score': 0.0,
        'last_para_score': 0.0,
        'alt_image_score': 0.0,
        'keyword_density_score': 0.0,
        'related_keyword_density_score': 0.0,
        'keyword_frequency_score': 0.0,
        'content_total_score': 0.0,
        'statuses': {
            'word_count': '',
            'first_paragraph': '',
            'last_paragraph': '',
            'alt_image': '',
            'keyword_density': '',
            'related_keyword_density': '',
            'keyword_frequency': '',
            'overall': ''
        }
    }
    
    # Clean HTML content
    text_content = re.sub(r'<a href="#"[^>]*>.*?</a>', '', content)
    text_content = re.sub('<[^<]+?>', '', text_content)
    paragraphs = [p.strip() for p in text_content.split('\n') if p.strip()]
    
    # 1. Word Count (14.5%)
    word_count = len(text_content.split())
    if word_count > 400:
        content_scores['word_count_score'] = 14.5
        content_scores['statuses']['word_count'] = 'Good'
    elif word_count > 200:
        content_scores['word_count_score'] = 7.25
        content_scores['statuses']['word_count'] = 'Needs Improvement'
    else:
        content_scores['statuses']['word_count'] = 'Bad'
    
    # 2. Target Keyword in First Paragraph (1.7%)
    if paragraphs and check_keyword_exist(paragraphs[0], target_keyword):
        content_scores['first_para_score'] = 1.7
        content_scores['statuses']['first_paragraph'] = 'Good'
    
    # 3. Target Keyword in Last Paragraph (1.7%)
    if paragraphs and check_keyword_exist(paragraphs[-1], target_keyword):
        content_scores['last_para_score'] = 1.7
        content_scores['statuses']['last_paragraph'] = 'Good'
    
    # 4. Target Keyword in Alt Image (0.9%)
    alt_images = re.findall(r'alt=["\'](.*?)["\']', content)
    if any(check_keyword_exist(alt, target_keyword) for alt in alt_images):
        content_scores['alt_image_score'] = 0.9
        content_scores['statuses']['alt_image'] = 'Good'
    
    # 5. Keyword Density (14.9%)
    keyword_density = calculate_keyword_density(text_content, target_keyword)
    if 2.5 <= keyword_density <= 5:
        content_scores['keyword_density_score'] = 14.9
        content_scores['statuses']['keyword_density'] = 'Good'
    elif keyword_density > 5:
        content_scores['keyword_density_score'] = 7.45
        content_scores['statuses']['keyword_density'] = 'Needs Improvement'
    
    # 6. Related Keyword Density (14.9%)
    if related_keyword:
        related_density = calculate_keyword_density(text_content, related_keyword)
        if 1 <= related_density <= 2:
            content_scores['related_keyword_density_score'] = 14.9
            content_scores['statuses']['related_keyword_density'] = 'Good'
        elif related_density < 1 or (2 < related_density < 5):
            content_scores['related_keyword_density_score'] = 7.45
            content_scores['statuses']['related_keyword_density'] = 'Needs Improvement'
    
    # 7. Keyword Frequency (25.5%)
    keyword_count = sum(text_content.lower().count(kw.lower()) for kw in target_keyword.split(','))
    if 3 <= keyword_count <= 6:
        content_scores['keyword_frequency_score'] = 25.5
        content_scores['statuses']['keyword_frequency'] = 'Good'
    elif 1 <= keyword_count <= 2:
        content_scores['keyword_frequency_score'] = 12.75
        content_scores['statuses']['keyword_frequency'] = 'Needs Improvement'
    
    # Calculate total score (85% maksimal tanpa internal link)
    content_scores['content_total_score'] = sum([
        content_scores['word_count_score'],
        content_scores['first_para_score'],
        content_scores['last_para_score'],
        content_scores['alt_image_score'],
        content_scores['keyword_density_score'],
        content_scores['related_keyword_density_score'],
        content_scores['keyword_frequency_score']
    ])
    
    # Determine overall status
    total_percentage = (content_scores['content_total_score'] / 85) * 100
    if total_percentage >= 80:
        content_scores['statuses']['overall'] = 'Good'
    elif total_percentage >= 50:
        content_scores['statuses']['overall'] = 'Needs Improvement'
    else:
        content_scores['statuses']['overall'] = 'Bad'
    
    return content_scores

# ====================== FLASK API ENDPOINTS ======================
@app.route('/analyze', methods=['POST'])
def analyze():
    """Endpoint utama untuk analisis SEO"""
    try:
        article_data = request.get_json()
        
        if not article_data:
            return jsonify({"error": "No JSON data provided"}), 400
        
        # Lakukan semua analisis
        title_scores = calculate_title_score(article_data)
        meta_scores = calculate_meta_desc_score(article_data)
        content_scores = calculate_content_score(article_data)
        
        # Hitung skor total
        overall_score = (
            title_scores['title_total_score'] +
            meta_scores['meta_total_score'] +
            content_scores['content_total_score']
        )
        max_score = 10 + 5 + 85  # Total maksimal semua komponen
        
        # Siapkan response
        response = {
            "title_analysis": {
                "scores": {k: format_score(v) for k, v in title_scores.items() if k.endswith('_score')},
                "statuses": title_scores['statuses']
            },
            "meta_analysis": {
                "scores": {k: format_score(v) for k, v in meta_scores.items() if k.endswith('_score')},
                "statuses": meta_scores['statuses']
            },
            "content_analysis": {
                "scores": {k: format_score(v) for k, v in content_scores.items() if k.endswith('_score')},
                "statuses": content_scores['statuses']
            },
            "overall_score": {
                "score": format_score(overall_score),
                "max_score": max_score,
                "percentage": round((overall_score / max_score) * 100, 2)
            }
        }
        
        return jsonify(response)
    
    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route('/')
def home():
    return "SEO Analysis API - Send POST request to /analyze with article data"

if __name__ == '__main__':
    app.run(debug=True)