import math
import re
import logging

class ArabicBubbleFormatter:
    def __init__(self):
        # النسب المستهدفة لكل عدد أسطر
        self.target_ratios = {
            2: [0.50, 0.50],  # التوزيع المثالي 50-50
            3: [0.30, 0.40, 0.30],  # شكل الفقاعة المثالي
            4: [0.20, 0.30, 0.30, 0.20],  # نسب محسنة للأربعة أسطر
            5: [0.15, 0.25, 0.30, 0.25, 0.15],
            6: [0.12, 0.18, 0.25, 0.25, 0.18, 0.12]
        }
        
        # حدود مهمة للتقييم
        self.min_single_word_length = 6  # الحد الأدنى لكلمة واحدة في سطر
        self.max_line_chars = 30  # الحد الأقصى لعدد الأحرف في السطر
        self.optimal_line_chars = 25  # العدد المثالي للأحرف في السطر
        self.max_words_per_line = 8
        
        # إعدادات التقييم
        self.perfect_score = 100
        self.good_score = 85
        self.acceptable_score = 70
        self.poor_score = 50

    def _preprocess_text(self, text: str) -> str:
        """معالجة مسبقة للنص"""
        text = re.sub(r'\s+', ' ', text).strip()
        return text

    def _get_line_character_length(self, words: list) -> int:
        """حساب عدد الأحرف في مجموعة من الكلمات مع المسافات"""
        if not words:
            return 0
        return sum(len(word) for word in words) + len(words) - 1

    def _calculate_total_characters(self, words: list) -> int:
        """حساب إجمالي الأحرف في النص"""
        return sum(len(word) for word in words) + len(words) - 1

    def _evaluate_line_distribution(self, lines: list) -> tuple:
        """تقييم توزيع الأسطر وإرجاع النتيجة مع التفاصيل"""
        if not lines:
            return 0, "لا توجد أسطر"
        
        num_lines = len(lines)
        line_lengths = [len(line) for line in lines]
        line_word_counts = [len(line.split()) for line in lines]
        total_chars = sum(line_lengths)
        
        score = self.perfect_score
        details = []
        
        # فحص الكلمات المفردة القصيرة
        for i, line in enumerate(lines):
            words = line.split()
            if len(words) == 1 and len(words[0]) < self.min_single_word_length:
                score -= 80
                details.append(f"❌ السطر {i+1}: كلمة واحدة قصيرة ({len(words[0])} أحرف)")
                return score, f"فشل: {'; '.join(details)}"
        
        # فحص طول الأسطر
        for i, length in enumerate(line_lengths):
            if length > self.max_line_chars:
                score -= 20
                details.append(f"⚠️ السطر {i+1}: طويل جداً ({length} حرف)")
            elif length > self.optimal_line_chars:
                score -= 10
                details.append(f"⚠️ السطر {i+1}: أطول من المثالي ({length} حرف)")
        
        # تقييم خاص حسب عدد الأسطر
        if num_lines == 1:
            # سطر واحد - مثالي للنصوص القصيرة
            if total_chars <= 30:
                details.append("✅ سطر واحد مثالي للنص القصير")
                return score, f"ممتاز: {'; '.join(details)}"
            else:
                score -= 30
                details.append("⚠️ النص طويل للسطر الواحد")
        
        elif num_lines == 2:
            # تقييم السطرين
            ratio1 = line_lengths[0] / total_chars
            ratio2 = line_lengths[1] / total_chars
            
            # النسبة المثالية 50-50
            if abs(ratio1 - 0.5) <= 0.01:  # ±1%
                score += 10
                details.append("✅ توزيع مثالي 50-50")
            elif abs(ratio1 - 0.5) <= 0.05:  # ±5%
                details.append("✅ توزيع جيد متقارب")
            elif abs(ratio1 - 0.55) <= 0.01 or abs(ratio1 - 0.45) <= 0.01:  # 55-45 أو 45-55
                score -= 5
                details.append("✅ توزيع جيد 55-45")
            elif abs(ratio1 - 0.6) <= 0.05:  # حوالي 60-40
                score -= 15
                details.append("⚠️ توزيع متوسط 60-40")
            else:
                score -= 25
                details.append("❌ توزيع غير متوازن")
        
        elif num_lines == 3:
            # تقييم الثلاثة أسطر
            first, middle, last = line_lengths
            
            # الوسط يجب أن يكون الأكبر
            if middle >= first and middle >= last:
                score += 15
                details.append("✅ السطر الأوسط هو الأكبر")
                
                # الأطراف متساوية أو متقاربة
                if abs(first - last) <= 2:
                    score += 15
                    details.append("✅ الأطراف متساوية")
                elif abs(first - last) <= 5:
                    score += 10
                    details.append("✅ الأطراف متقاربة")
                else:
                    score -= 10
                    details.append("⚠️ الأطراف مختلفة")
            else:
                score -= 20
                details.append("❌ السطر الأوسط ليس الأكبر")
        
        elif num_lines == 4:
            # تقييم الأربعة أسطر
            first, second, third, fourth = line_lengths
            
            # السطران الأوسطان أكبر من الأطراف
            if second >= first and third >= fourth and second >= 0.8 * max(line_lengths) and third >= 0.8 * max(line_lengths):
                score += 15
                details.append("✅ السطران الأوسطان أكبر")
                
                # الأطراف متساوية
                if abs(first - fourth) <= 2:
                    score += 10
                    details.append("✅ الأطراف متساوية")
                
                # الأوسطان متساويان
                if abs(second - third) <= 2:
                    score += 10
                    details.append("✅ الأوسطان متساويان")
            else:
                score -= 15
                details.append("❌ توزيع غير مثالي")
        
        elif num_lines == 5:
            # تقييم الخمسة أسطر
            first, second, third, fourth, fifth = line_lengths
            
            # الوسط الأكبر
            if third >= max(line_lengths) * 0.9:
                score += 10
                details.append("✅ السطر الأوسط هو الأكبر")
                
                # الأطراف متساوية
                if abs(first - fifth) <= 2:
                    score += 10
                    details.append("✅ الأطراف متساوية")
                
                # الثاني والرابع متساويان وأكبر من الأطراف
                if abs(second - fourth) <= 2 and second > first and fourth > fifth:
                    score += 15
                    details.append("✅ الثاني والرابع متساويان وأكبر من الأطراف")
            else:
                score -= 15
                details.append("❌ توزيع غير مثالي")
        
        elif num_lines == 6:
            # تقييم الستة أسطر
            first, second, third, fourth, fifth, sixth = line_lengths
            
            # الأوسطان متساويان وأكبر
            if abs(third - fourth) <= 2 and third >= max(line_lengths) * 0.9:
                score += 10
                details.append("✅ الأوسطان متساويان وأكبر")
                
                # الأطراف متساوية
                if abs(first - sixth) <= 2:
                    score += 10
                    details.append("✅ الأطراف متساوية")
                
                # الثاني والخامس متساويان
                if abs(second - fifth) <= 2 and second > first and fifth > sixth:
                    score += 15
                    details.append("✅ الثاني والخامس متساويان وأكبر من الأطراف")
            else:
                score -= 15
                details.append("❌ توزيع غير مثالي")
        
        # تحديد مستوى الجودة
        if score >= 95:
            quality = "ممتاز"
        elif score >= 85:
            quality = "جيد جداً"
        elif score >= 70:
            quality = "جيد"
        elif score >= 50:
            quality = "متوسط"
        else:
            quality = "ضعيف"
        
        return max(0, score), f"{quality}: {'; '.join(details)}"

    def _distribute_words_optimally(self, words: list, num_lines: int) -> list:
        """توزيع الكلمات بشكل مثالي"""
        if num_lines == 1:
            return [" ".join(words)]
        
        if num_lines not in self.target_ratios:
            return self._simple_distribution(words, num_lines)
        
        ratios = self.target_ratios[num_lines]
        total_chars = self._calculate_total_characters(words)
        n = len(words)
        
        # حساب الأطوال المستهدفة
        target_lengths = [int(total_chars * ratio) for ratio in ratios]
        
        # توزيع الكلمات
        result_lines = []
        current_index = 0
        
        for i, target_len in enumerate(target_lengths):
            if current_index >= n:
                break
            
            best_end = current_index + 1
            best_diff = float('inf')
            
            # تحديد نطاق البحث
            max_words_to_try = min(self.max_words_per_line, n - current_index)
            
            for end in range(current_index + 1, current_index + max_words_to_try + 1):
                if end > n:
                    break
                
                line_words = words[current_index:end]
                line_length = self._get_line_character_length(line_words)
                
                # فحص الكلمة الواحدة القصيرة
                if len(line_words) == 1 and len(line_words[0]) < self.min_single_word_length:
                    continue
                
                # فحص طول السطر
                if line_length > self.max_line_chars:
                    continue
                
                diff = abs(line_length - target_len)
                
                # تحسين خاص للأسطر الأربعة - جعل الأوسطان أكبر
                if num_lines == 4:
                    if i == 1 or i == 2:  # السطر الثاني والثالث (الأوسطان)
                        # تفضيل الأطوال الأكبر للأوسطان
                        if line_length >= target_len:
                            diff -= 20
                        # مكافأة إضافية للأوسطان المتساويين
                        if i == 1:
                            self.temp_second_line_length = line_length
                        elif i == 2 and hasattr(self, 'temp_second_line_length'):
                            if abs(line_length - self.temp_second_line_length) <= 2:
                                diff -= 15
                    elif i == 0 or i == 3:  # السطر الأول والرابع (الأطراف)
                        # تفضيل الأطوال الأصغر للأطراف
                        if line_length <= target_len:
                            diff -= 15
                
                # تفضيل الأطوال المثالية
                if line_length <= self.optimal_line_chars:
                    diff -= 10
                
                # تعامل مع السطر الأخير
                if i == num_lines - 1:
                    remaining_words = n - end
                    if remaining_words > 0:
                        diff += remaining_words * 50
                
                if diff < best_diff:
                    best_diff = diff
                    best_end = end
            
            # إضافة السطر
            if current_index < n:
                line_words = words[current_index:best_end]
                result_lines.append(" ".join(line_words))
                current_index = best_end
        
        # إضافة الكلمات المتبقية
        if current_index < n:
            remaining_words = words[current_index:]
            if result_lines and len(remaining_words) <= 2:
                # دمج مع السطر الأخير إذا كان قليل
                last_line_words = result_lines[-1].split() + remaining_words
                if len(" ".join(last_line_words)) <= self.max_line_chars:
                    result_lines[-1] = " ".join(last_line_words)
                else:
                    result_lines.append(" ".join(remaining_words))
            else:
                result_lines.append(" ".join(remaining_words))
        
        return result_lines

    def _simple_distribution(self, words: list, num_lines: int) -> list:
        """توزيع بسيط كخيار احتياطي"""
        n = len(words)
        words_per_line = n // num_lines
        remainder = n % num_lines
        
        result = []
        index = 0
        
        for i in range(num_lines):
            line_len = words_per_line + (1 if i < remainder else 0)
            line_words = words[index:index+line_len]
            index += line_len
            if line_words:
                result.append(" ".join(line_words))
        
        return result

    def _find_optimal_formatting(self, words: list) -> tuple:
        """البحث عن أفضل تنسيق"""
        n = len(words)
        total_chars = self._calculate_total_characters(words)
        
        # حالة خاصة للكلمة الواحدة
        if n == 1:
            return [words[0]], self.perfect_score, "كلمة واحدة"
        
        # حالة خاصة للنصوص القصيرة جداً
        if total_chars <= 25:
            return [" ".join(words)], self.perfect_score, "نص قصير - سطر واحد"
        
        best_result = None
        best_score = 0
        best_details = ""
        
        # جرب أعداد مختلفة من الأسطر
        max_lines = min(6, n)
        
        for num_lines in range(1, max_lines + 1):
            if num_lines > n:
                continue
            
            # توزيع الكلمات
            lines = self._distribute_words_optimally(words, num_lines)
            
            # تقييم التوزيع
            score, details = self._evaluate_line_distribution(lines)
            
            # تفضيل عدد أسطر أقل للنصوص القصيرة
            if total_chars <= 50 and num_lines <= 2:
                score += 10
            elif total_chars <= 80 and num_lines <= 3:
                score += 5
            
            # تفضيل الأسطر الثلاثة للنصوص المتوسطة
            if 50 <= total_chars <= 120 and num_lines == 3:
                score += 15
            
            if score > best_score:
                best_score = score
                best_result = lines
                best_details = details
        
        return best_result, best_score, best_details

    def format_text(self, text: str) -> dict:
        try:
            if not text.strip():
                return {"formatted": text, "score": 0, "details": "نص فارغ"}
            
            processed_text = self._preprocess_text(text)
            words = processed_text.split()
            
            # البحث عن أفضل تنسيق
            best_lines, score, details = self._find_optimal_formatting(words)
            
            if best_lines:
                formatted_text = "\n".join(best_lines)
            else:
                formatted_text = processed_text
        
            return {
                "formatted": formatted_text,
                "score": score,
                "details": details,
                "lines_count": len(best_lines) if best_lines else 1,
                "original_length": len(processed_text),
                "lines_info": [
                    {
                        "text": line,
                        "length": len(line),
                        "words": len(line.split())
                    }
                    for line in best_lines
                ] if best_lines else []
            }            
        except Exception as e:
            logging.error(f"Bubble formatting error: {str(e)}")
            return {
                "formatted": text,
                "score": 0,
                "details": f"Formatting failed: {str(e)}"
            }

# # اختبار مع أمثلة متنوعة
# if __name__ == "__main__":
#     formatter = ArabicBubbleFormatter()
    
#     # أمثلة للاختبار
#     test_cases = [
#         # نصوص قصيرة - سطر واحد
#         "مرحبا",
#         "صباح الخير",
#         "كيف حالك اليوم؟",
        
#         # نصوص متوسطة - سطرين
#         "هذا نص متوسط الطول يناسب السطرين",
#         "النص القصير نسبياً للتجربة",
        
#         # نصوص مثالية للثلاثة أسطر
#         "هذا نص طويل نسبياً يحتوي على عدة كلمات ومناسب للتوزيع على ثلاثة أسطر",
#         "النص المتوسط الطول يبدو أفضل عندما يتم توزيعه على ثلاثة أسطر بشكل جميل",
        
#         # النصوص الإشكالية من الأصل
#         "هذا! ضربة أقوى بمئة مرة! هذا الرجل، إنه حقا من عـشـيرة الذهب",
#         "لا أذكر، فقد مضى وقت طويل.",
#         "هذا إنذاري الأخير.",
#         "أمنحك فرصة ثالثة لسماحتي.",
        
#         # نصوص طويلة
#         "هذا نص طويل جداً يحتوي على كلمات كثيرة ومناسب للتوزيع على أربعة أو خمسة أسطر حسب الحاجة والتوزيع الأمثل",
#         "النصوص الطويلة تحتاج إلى تنسيق خاص لتبدو جميلة ومتوازنة في الفقاعات وهذا مثال على نص طويل نسبياً"
#     ]
    
#     print("=" * 100)
#     print("نتائج التنسيق مع نظام التقييم المحسن")
#     print("=" * 100)
    
#     for i, text in enumerate(test_cases, 1):
#         result = formatter.format_text(text)
        
#         print(f"\n{i}. النص الأصلي: {text}")
#         print(f"الطول: {result['original_length']} حرف")
#         print("\nالنتيجة:")
#         print(result['formatted'])
#         print(f"\nالتقييم: {result['score']:.1f}/100")
#         print(f"التفاصيل: {result['details']}")
#         print(f"عدد الأسطر: {result['lines_count']}")
        
#         # تفاصيل كل سطر
#         for j, line_info in enumerate(result['lines_info'], 1):
#             print(f"السطر {j}: {line_info['length']} حرف، {line_info['words']} كلمة")
        
#         # رمز الجودة
#         score = result['score']
#         if score >= 95:
#             print("🏆 تنسيق ممتاز!")
#         elif score >= 85:
#             print("✨ تنسيق جيد جداً!")
#         elif score >= 70:
#             print("✅ تنسيق جيد")
#         elif score >= 50:
#             print("⚠️ تنسيق متوسط")
#         else:
#             print("❌ تنسيق ضعيف")
        
#         print("-" * 60)