Spaces:

humy65
/

schooly-intent-code

Sleeping

App Files Files Community

humy65 commited on Aug 12, 2025

Commit

f8fab2a

1 Parent(s): 80fe507

Add training data display to debug tab

Browse files

Files changed (1) hide show

app.py +199 -52

app.py CHANGED Viewed

@@ -6,82 +6,87 @@ import gradio as gr
 import sys
 import traceback
 def test_model_loading():
     """Test if model can be loaded"""
     try:
         print("🔄 Testing model loading...")
         from transformers import AutoTokenizer, AutoModelForSequenceClassification
         model_name = "humy65/hebrew-intent-classifier"
         print(f"📡 Attempting to load: {model_name}")
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         print("✅ Tokenizer loaded")
         model = AutoModelForSequenceClassification.from_pretrained(model_name)
         print("✅ Model loaded")
         print(f"📋 Labels: {model.config.id2label}")
         return True, "Model loaded successfully!", model, tokenizer
     except Exception as e:
         error_msg = f"❌ Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
         print(error_msg)
         return False, error_msg, None, None
 def classify_text(text):
     """Classification function with lazy loading"""
     if not text or not text.strip():
         return "⚠️ Please enter Hebrew text", {}
     try:
         # Try to load model on demand
         success, message, model, tokenizer = test_model_loading()
         if not success:
             return f"Model Loading Failed:\n{message}", {}
         # Perform classification
         import torch
-        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
         with torch.no_grad():
             outputs = model(**inputs)
             logits = outputs.logits
             probabilities = torch.softmax(logits, dim=-1)
         # Get results
         predicted_id = torch.argmax(logits, dim=-1).item()
         predicted_label = model.config.id2label[predicted_id]
         confidence = probabilities[0][predicted_id].item()
         # Create confidence scores for all labels
         all_scores = {}
         for i, prob in enumerate(probabilities[0]):
             intent_name = model.config.id2label[i]
             all_scores[intent_name] = float(prob)
         result = f"""
 🎯 Predicted Intent: {predicted_label}
 🎲 Confidence: {confidence:.1%}
 📊 All Predictions:
 """
         # Sort and display
-        sorted_scores = sorted(all_scores.items(), key=lambda x: x[1], reverse=True)
         for intent, score in sorted_scores:
             bar = "█" * max(1, int(score * 20))
             result += f"\n{intent}: {score:.1%} {bar}"
         return result, all_scores
     except Exception as e:
         error_msg = f"Classification Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
         print(error_msg)
         return error_msg, {}
 def test_connection():
     """Test Hugging Face connection"""
     try:
@@ -92,11 +97,138 @@ def test_connection():
     except Exception as e:
         return f"❌ Repository access failed: {str(e)}"
 # Create interface
 with gr.Blocks(title="Hebrew Intent Classification - Debug") as demo:
     gr.Markdown("# 🇮🇱 Hebrew Intent Classification - Debug Version")
     with gr.Tab("Classification"):
         with gr.Row():
             with gr.Column():
@@ -106,67 +238,82 @@ with gr.Blocks(title="Hebrew Intent Classification - Debug") as demo:
                     lines=3
                 )
                 classify_btn = gr.Button("Classify", variant="primary")
                 # Quick examples
                 gr.Markdown("### Examples:")
                 examples = [
                     "ש��חתי את הסיסמה שלי",
-                    "רוצה לבטל את המנוי",
                     "כמה עולה החבילה",
                     "האתר לא עובד"
                 ]
                 for example in examples:
                     gr.Button(example, size="sm").click(
                         lambda x=example: x, outputs=text_input
                     )
             with gr.Column():
                 result_output = gr.Textbox(
                     label="Result:",
                     lines=12,
                     interactive=False
                 )
                 confidence_output = gr.Label(
                     label="Confidence Scores",
                     num_top_classes=4
                 )
     with gr.Tab("Debug"):
         gr.Markdown("### Debug Information")
-        test_btn = gr.Button("Test Model Loading")
-        debug_output = gr.Textbox(
-            label="Debug Output:",
-            lines=15,
-            interactive=False
-        )
-        test_btn.click(
-            lambda: test_model_loading()[1],
-            outputs=debug_output
-        )
-        conn_btn = gr.Button("Test Repository Connection")
-        conn_output = gr.Textbox(
-            label="Connection Test:",
-            lines=5,
-            interactive=False
-        )
-        conn_btn.click(
-            test_connection,
-            outputs=conn_output
-        )
     # Connect classification
     classify_btn.click(
         classify_text,
         inputs=[text_input],
         outputs=[result_output, confidence_output]
     )
     text_input.submit(
         classify_text,
         inputs=[text_input],

 import sys
 import traceback
 def test_model_loading():
     """Test if model can be loaded"""
     try:
         print("🔄 Testing model loading...")
         from transformers import AutoTokenizer, AutoModelForSequenceClassification
         model_name = "humy65/hebrew-intent-classifier"
         print(f"📡 Attempting to load: {model_name}")
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         print("✅ Tokenizer loaded")
         model = AutoModelForSequenceClassification.from_pretrained(model_name)
         print("✅ Model loaded")
         print(f"📋 Labels: {model.config.id2label}")
         return True, "Model loaded successfully!", model, tokenizer
     except Exception as e:
         error_msg = f"❌ Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
         print(error_msg)
         return False, error_msg, None, None
 def classify_text(text):
     """Classification function with lazy loading"""
     if not text or not text.strip():
         return "⚠️ Please enter Hebrew text", {}
     try:
         # Try to load model on demand
         success, message, model, tokenizer = test_model_loading()
         if not success:
             return f"Model Loading Failed:\n{message}", {}
         # Perform classification
         import torch
+        inputs = tokenizer(text, return_tensors="pt",
+                           padding=True, truncation=True, max_length=128)
         with torch.no_grad():
             outputs = model(**inputs)
             logits = outputs.logits
             probabilities = torch.softmax(logits, dim=-1)
         # Get results
         predicted_id = torch.argmax(logits, dim=-1).item()
         predicted_label = model.config.id2label[predicted_id]
         confidence = probabilities[0][predicted_id].item()
         # Create confidence scores for all labels
         all_scores = {}
         for i, prob in enumerate(probabilities[0]):
             intent_name = model.config.id2label[i]
             all_scores[intent_name] = float(prob)
         result = f"""
 🎯 Predicted Intent: {predicted_label}
 🎲 Confidence: {confidence:.1%}
 📊 All Predictions:
 """
         # Sort and display
+        sorted_scores = sorted(
+            all_scores.items(), key=lambda x: x[1], reverse=True)
         for intent, score in sorted_scores:
             bar = "█" * max(1, int(score * 20))
             result += f"\n{intent}: {score:.1%} {bar}"
         return result, all_scores
     except Exception as e:
         error_msg = f"Classification Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
         print(error_msg)
         return error_msg, {}
 def test_connection():
     """Test Hugging Face connection"""
     try:
     except Exception as e:
         return f"❌ Repository access failed: {str(e)}"
+def get_training_data():
+    """Display the training data used for the model"""
+    training_data = [
+        ("שכחתי את הסיסמה שלי", "שכחת סיסמה"),
+        ("איך אני מבטל את המנוי?", "ביטול מנוי"),
+        ("מה המחיר של התוכנית?", "שאלה כללית"),
+        ("האתר לא עובד לי", "תמיכה טכנית"),
+        ("אני לא מצליח להתחבר", "תמיכה טכנית"),
+        ("איך אני משנה את כתובת האימייל?", "שאלה כללית"),
+        ("אני רוצה לשדרג את התוכנית שלי", "שאלה כללית"),
+        ("החשבון שלי ננעל", "תמיכה טכנית"),
+        ("אני לא מקבל מיילים", "תמיכה טכנית"),
+        ("איך אני רואה את החשבונית שלי?", "שאלה כללית"),
+        ("אני רוצה לבטל את השירות", "ביטול מנוי"),
+        ("שכחתי את פרטי ההתחברות", "שכחת סיסמה"),
+        ("איבדתי את הסיסמה", "שכחת סיסמה"),
+        ("לא זוכר את הסיסמה", "שכחת סיסמה"),
+        ("הסיסמה לא עובדת", "שכחת סיסמה"),
+        ("לא מצליח להיכנס עם הסיסמה", "שכחת סיסמה"),
+        ("צריך לאפס את הסיסמה", "שכחת סיסמה"),
+        ("בעיה עם הסיסמה", "שכחת סיסמה"),
+        ("הסיסמה שלי לא נכונה", "שכחת סיסמה"),
+        ("שכחתי מה הסיסמה", "שכחת סיסמה"),
+        ("איך אני משחזר את הסיסמה", "שכחת סיסמה"),
+        ("רוצה לשנות את הסיסמה", "שכחת סיסמה"),
+        ("הסיסמה לא מתקבלת", "שכחת סיסמה"),
+        ("בעיית התחברות - סיסמה", "שכחת סיסמה"),
+        ("צריך עזרה עם הסיסמה", "שכחת סיסמה"),
+        ("לא יודע מה הסיסמה שלי", "שכחת סיסמה"),
+        ("רוצה לבטל את השירות", "ביטול מנוי"),
+        ("איך מפסיקים את המנוי", "ביטול מנוי"),
+        ("רוצה להפסיק את התשלום", "ביטול מנוי"),
+        ("איך יוצאים מהמנוי", "ביטול מנוי"),
+        ("בקשה לביטול מנוי", "ביטול מנוי"),
+        ("לא רוצה יותר את השירות", "ביטול מנוי"),
+        ("איך מבטלים את החשבון", "ביטול מנוי"),
+        ("רוצה לסגור את החשבון", "ביטול מנוי"),
+        ("עזרה בביטול מנוי", "ביטול מנוי"),
+        ("הליך ביטול המנוי", "ביטול מנוי"),
+        ("מעוניין לבטל", "ביטול מנוי"),
+        ("איך מפסיקים את השירות", "ביטול מנוי"),
+        ("רוצה להפסיק את ההרשמה", "ביטול מנוי"),
+        ("בקשה להפסקת שירות", "ביטול מנוי"),
+        ("מה כולל השירות", "שאלה כללית"),
+        ("אילו תוכניות יש לכם", "שאלה כללית"),
+        ("כמה עולה החבילה", "שאלה כללית"),
+        ("מה ההבדל בין התוכניות", "שאלה כללית"),
+        ("איך אני משנה את הפרטים שלי", "שאלה כללית"),
+        ("איך אפשר לשדרג", "שאלה כללית"),
+        ("מה האפשרויות שלכם", "שאלה כללית"),
+        ("אני רוצה לעדכן פרטים", "שאלה כללית"),
+        ("איך רואים את ההיסטוריה", "שאלה כללית"),
+        ("האפליקציה קורסת", "תמיכה טכנית"),
+        ("יש באג באתר", "תמיכה טכנית"),
+        ("הדף לא נטען", "תמיכה טכנית"),
+        ("שגיאה במערכת", "תמיכה טכנית"),
+        ("הטוען לא עובד", "תמיכה טכנית"),
+        ("בעיה טכנית", "תמיכה טכנית"),
+        ("המערכת לא מגיבה", "תמיכה טכנית"),
+        ("שגיאת חיבור", "תמיכה טכנית"),
+        ("הכפתור לא עובד", "תמיכה טכנית"),
+        ("התמונות לא נטענות", "תמיכה טכנית"),
+        ("הווידאו לא מתנגן", "תמיכה טכנית"),
+        ("איטיות באתר", "תמיכה טכנית")
+    ]
+    # Count examples per category
+    category_counts = {}
+    for _, label in training_data:
+        category_counts[label] = category_counts.get(label, 0) + 1
+    result = f"""
+📊 **Training Data Summary**
+Total Examples: {len(training_data)}
+📈 **Examples per Category:**
+"""
+    # Add category statistics
+    for category, count in sorted(category_counts.items()):
+        percentage = (count / len(training_data)) * 100
+        result += f"\n• {category}: {count} examples ({percentage:.1f}%)"
+    result += f"""
+📝 **Sample Training Examples:**
+🔐 **שכחת סיסמה (Password Reset):**
+• שכחתי את הסיסמה שלי
+• לא זוכר את הסיסמה
+• הסיסמה לא עובדת
+• צריך לאפס את הסיסמה
+• איך אני משחזר את הסיסמה
+❌ **ביטול מנוי (Cancel Subscription):**
+• איך אני מבטל את המנוי?
+• רוצה להפסיק את התשלום
+• לא רוצה יותר את השירות
+• איך מבטלים את החשבון
+• בקשה לביטול מנוי
+❓ **שאלה כללית (General Question):**
+• מה המחיר של התוכנית?
+• כמה עולה החבילה
+• אילו תוכניות יש לכם
+• איך אני משנה את הפרטים שלי
+• מה כולל השירות
+🔧 **תמיכה טכנית (Technical Support):**
+• האתר לא עובד לי
+• האפליקציה קורסת
+• יש באג באתר
+• הדף לא נטען
+• שגיאה במערכת
+---
+💡 **Model was trained with data augmentation techniques:**
+• Synonym replacement
+• Paraphrasing
+• Context variation
+• Original 12 examples → Enhanced to {len(training_data)} examples
+"""
+    return result
 # Create interface
 with gr.Blocks(title="Hebrew Intent Classification - Debug") as demo:
     gr.Markdown("# 🇮🇱 Hebrew Intent Classification - Debug Version")
     with gr.Tab("Classification"):
         with gr.Row():
             with gr.Column():
                     lines=3
                 )
                 classify_btn = gr.Button("Classify", variant="primary")
                 # Quick examples
                 gr.Markdown("### Examples:")
                 examples = [
                     "ש��חתי את הסיסמה שלי",
+                    "רוצה לבטל את המנוי",
                     "כמה עולה החבילה",
                     "האתר לא עובד"
                 ]
                 for example in examples:
                     gr.Button(example, size="sm").click(
                         lambda x=example: x, outputs=text_input
                     )
             with gr.Column():
                 result_output = gr.Textbox(
                     label="Result:",
                     lines=12,
                     interactive=False
                 )
                 confidence_output = gr.Label(
                     label="Confidence Scores",
                     num_top_classes=4
                 )
     with gr.Tab("Debug"):
         gr.Markdown("### Debug Information")
+        with gr.Row():
+            with gr.Column():
+                test_btn = gr.Button("Test Model Loading")
+                debug_output = gr.Textbox(
+                    label="Debug Output:",
+                    lines=15,
+                    interactive=False
+                )
+                test_btn.click(
+                    lambda: test_model_loading()[1],
+                    outputs=debug_output
+                )
+                conn_btn = gr.Button("Test Repository Connection")
+                conn_output = gr.Textbox(
+                    label="Connection Test:",
+                    lines=5,
+                    interactive=False
+                )
+                conn_btn.click(
+                    test_connection,
+                    outputs=conn_output
+                )
+            with gr.Column():
+                data_btn = gr.Button("Show Training Data")
+                training_output = gr.Textbox(
+                    label="Training Data:",
+                    lines=20,
+                    interactive=False
+                )
+                data_btn.click(
+                    get_training_data,
+                    outputs=training_output
+                )
     # Connect classification
     classify_btn.click(
         classify_text,
         inputs=[text_input],
         outputs=[result_output, confidence_output]
     )
     text_input.submit(
         classify_text,
         inputs=[text_input],