Spaces:

abbasNoway
/

Urdu_Emoji_predictor

Sleeping

App Files Files Community

abbasNoway commited on Nov 29, 2025

Commit

045109a

verified ·

1 Parent(s): 8efcb49

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -178

app.py CHANGED Viewed

@@ -15,203 +15,136 @@ class UrduOptimizedPredictor:
         self.text_model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')
         self.text_model.to(self.device)
-        # Try multiple possible model file locations
-        possible_paths = [
-            "urdu_optimized_model.pkl",
-            "./urdu_optimized_model.pkl",
-            "models/urdu_optimized_model/urdu_optimized_model.pkl",
-            "urdu_optimized_model.pkl"
-        ]
-        model_loaded = False
-        for model_file in possible_paths:
-            if os.path.exists(model_file):
-                print(f"📁 Loading model from: {model_file}")
-                try:
-                    with open(model_file, 'rb') as f:
-                        model_data = pickle.load(f)
-                    self.emoji_embeddings = {k: v[0] for k, v in model_data['emoji_embeddings'].items()}
-                    self.emoji_list = model_data['emoji_list']
-                    print(f"✅ Loaded Urdu-optimized model with {len(self.emoji_list)} meaningful emojis")
-                    model_loaded = True
-                    break
-                except Exception as e:
-                    print(f"❌ Error loading {model_file}: {e}")
-                    continue
-        if not model_loaded:
-            print("❌ Could not load model file. Using fallback predictions.")
-            # Create fallback emoji mappings
-            self.emoji_embeddings = {}
-            self.emoji_list = ["😊", "❤️", "😂", "😭", "😍", "🔥", "🙏", "👍"]
-    def predict_smart(self, text, top_k=3, min_confidence=0.3):
-        """Smart prediction with confidence filtering"""
-        # Check if model is loaded properly
-        if not self.emoji_embeddings:
-            return self.fallback_predict(text, top_k)
         try:
-            # Get text embedding
-            text_embedding = self.text_model.encode([text], convert_to_tensor=True)
-            text_embedding_np = text_embedding.cpu().numpy()
-            # Calculate similarities
-            similarities = {}
-            for emoji, emoji_embedding in self.emoji_embeddings.items():
-                similarity = cosine_similarity(text_embedding_np, emoji_embedding.reshape(1, -1))[0][0]
-                similarities[emoji] = similarity
-            # Filter by confidence and return top K
-            filtered = [(emoji, score) for emoji, score in similarities.items() if score >= min_confidence]
-            sorted_emojis = sorted(filtered, key=lambda x: x[1], reverse=True)
-            # If no confident predictions, return top 1 anyway
-            if not sorted_emojis:
-                top_overall = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:1]
-                return top_overall
-            return sorted_emojis[:top_k]
         except Exception as e:
-            print(f"Prediction error: {e}")
-            return self.fallback_predict(text, top_k)
-    def fallback_predict(self, text, top_k=3):
-        """Fallback prediction using keyword matching"""
-        text_lower = text.lower()
-        # Keyword to emoji mapping
-        keyword_mapping = {
-            'خوش': ['😊', '😄', '😂'],
-            'مسکراہٹ': ['😊', '🙂', '😄'],
-            'ہنس': ['😂', '😄', '🤣'],
-            'محبت': ['❤️', '😍', '💕'],
-            'دل': ['❤️', '💖', '💕'],
-            'غصہ': ['😠', '👿', '💢'],
-            'ناراض': ['😠', '😡', '👿'],
-            'رونا': ['😭', '😢', '��'],
-            'دکھ': ['😔', '😞', '😢'],
-            'تھک': ['😴', '🥱', '😪'],
-            'نیند': ['😴', '💤', '🥱'],
-            'بارش': ['🌧️', '☔', '⛈️'],
-            'موسم': ['🌤️', '☀️', '🌞'],
-            'دوست': ['👫', '👭', '👬'],
-            'پارٹی': ['🎉', '🎊', '🥳'],
-            'کھانا': ['😋', '🍕', '🍔'],
-            'پڑھائی': ['📚', '✏️', '🎓'],
-            'کام': ['💼', '👔', '📊'],
-            'سفر': ['✈️', '🚗', '🏞️'],
-            'کھیل': ['⚽', '🏀', '🎾']
-        }
-        # Find matching keywords
-        matches = []
-        for keyword, emojis in keyword_mapping.items():
-            if keyword in text_lower:
-                matches.extend([(emoji, 0.8) for emoji in emojis])
-        # Remove duplicates and return top_k
-        unique_matches = []
-        seen_emojis = set()
-        for emoji, score in matches:
-            if emoji not in seen_emojis:
-                unique_matches.append((emoji, score))
-                seen_emojis.add(emoji)
-        return unique_matches[:top_k] if unique_matches else [('😐', 0.5)]
 # Initialize predictor
-print("🚀 Loading Urdu Emoji Prediction Model...")
 predictor = UrduOptimizedPredictor()
 def predict_emoji(urdu_text):
-    """Main prediction function for Gradio interface"""
     if not urdu_text.strip():
         return "⬅️ اردو متن لکھیں"
     try:
-        # Get predictions from model
         predictions = predictor.predict_smart(urdu_text, top_k=3, min_confidence=0.3)
         # Format output
         if predictions:
-            result = ""
             for i, (emoji, score) in enumerate(predictions, 1):
-                confidence = "🟢" if score > 0.6 else "🟡" if score > 0.4 else "🔴"
-                result += f"{i}. {emoji} {confidence} ({score:.2f})\n"
-            return result.strip()
         else:
-            return "❌ کوئی مناسب ایموجی نہیں مل سکی"
     except Exception as e:
         print(f"Error in prediction: {e}")
-        return "⚠️ نظام میں خرابی، براہ کرم دوبارہ کوشش کریں"
-def batch_predict(texts):
-    """Batch prediction for multiple texts"""
-    results = []
-    for text in texts:
-        if text.strip():
-            predictions = predictor.predict_smart(text, top_k=2, min_confidence=0.3)
-            emojis = " ".join([emoji for emoji, score in predictions])
-            results.append(f"{text} → {emojis}")
-        else:
-            results.append("⬅️ متن درکار ہے")
-    return "\n".join(results)
 # Create Gradio interface
-with gr.Blocks(theme=gr.themes.Soft(), title="Urdu Emoji Predictor") as demo:
     gr.Markdown(
         """
-        # 🇵🇰 اردو ایموجی پ��شنگو
-        ### اردو متن کے لیے موزوں ترین ایموجیز کی پیشنگو
-        اپنا اردو متن لکھیں اور متعلقہ ایموجیز حاصل کریں!
         """
     )
     with gr.Row():
         with gr.Column():
             input_text = gr.Textbox(
-                label="اردو متن",
-                placeholder="یہاں اپنا اردو متن لکھیں... مثال: میں آج بہت خوش ہوں",
                 lines=3
             )
-            predict_btn = gr.Button("🚀 ایموجیز پیش کریں", variant="primary")
         with gr.Column():
             output_text = gr.Textbox(
-                label="پیشنگو ایموجیز",
-                placeholder="یہاں آپ کی پیشنگو ایموجیز ظاہر ہوں گی...",
-                lines=4
             )
-    # Confidence threshold slider
-    confidence_slider = gr.Slider(
-        minimum=0.1,
-        maximum=0.9,
-        value=0.3,
-        step=0.1,
-        label="اعتماد کی سطح",
-        info="اعتماد کی سطح کم رکھیں تو زیادہ ایموجیز مل سکتی ہیں"
-    )
-    # Batch prediction section
-    with gr.Accordion("🎯 ایک سے زیادہ متنوں کے لیے", open=False):
-        batch_input = gr.Textbox(
-            label="متنوں کی فہرست (ہر متن نیلی لائن پر)",
-            placeholder="پہلا متن\nدوسرا متن\nتیسرا متن",
-            lines=4
-        )
-        batch_output = gr.Textbox(label="نتیجہ", lines=4)
-        batch_btn = gr.Button("📊 تمام کے لیے ایموجیز پیش کریں")
-    # Examples section
-    gr.Markdown("### 💡 مثالیں آزما کر دیکھیں")
     examples = gr.Examples(
         examples=[
             ["میں آج بہت خوش ہوں اور مسکرا رہا ہوں"],
@@ -226,40 +159,12 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Urdu Emoji Predictor") as demo:
         inputs=input_text,
         outputs=output_text,
         fn=predict_emoji,
-        cache_examples=True
-    )
-    # Footer
-    gr.Markdown(
-        """
-        ---
-        ### ℹ️ نظام کے بارے میں
-        - **ماڈل**: Urdu-optimized Embedding Model
-        - **ایموجیز**: 80+ Urdu-context optimized emojis
-        - **درستی**: Top-3 درستگی 30.4%
-        - **ڈیٹا**: 10 لاکھ+ Urdu tweets پر تربیت
-        🟢 اعلی درستگی | 🟡 درمیانی درستگی | 🔴 کم درستگی
-        """
-    )
-    # Event handlers
-    predict_btn.click(
-        fn=predict_emoji,
-        inputs=input_text,
-        outputs=output_text
-    )
-    batch_btn.click(
-        fn=lambda x: batch_predict(x.split('\n')),
-        inputs=batch_input,
-        outputs=batch_output
     )
 if __name__ == "__main__":
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
-        share=True,  # Creates public link
-        show_error=True
     )

         self.text_model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')
         self.text_model.to(self.device)
+        # Load YOUR model
+        model_file = "models/urdu_optimized_model/urdu_optimized_model.pkl"
+        print(f"📁 Loading YOUR model from: {model_file}")
         try:
+            with open(model_file, 'rb') as f:
+                model_data = pickle.load(f)
+            self.emoji_embeddings = {k: v[0] for k, v in model_data['emoji_embeddings'].items()}
+            self.emoji_list = model_data['emoji_list']
+            print(f"✅ SUCCESS: Loaded YOUR Urdu-optimized model with {len(self.emoji_list)} emojis")
+            print(f"📊 Your emojis: {self.emoji_list[:20]}...")  # Show first 20 emojis
         except Exception as e:
+            print(f"❌ ERROR loading your model: {e}")
+            raise e
+    def predict_smart(self, text, top_k=3, min_confidence=0.3):
+        """Use YOUR model for prediction"""
+        print(f"\n🔍 PREDICTING for: '{text}'")
+        # Get text embedding
+        text_embedding = self.text_model.encode([text], convert_to_tensor=True)
+        text_embedding_np = text_embedding.cpu().numpy()
+        # Calculate similarities with YOUR emoji embeddings
+        similarities = {}
+        for emoji, emoji_embedding in self.emoji_embeddings.items():
+            similarity = cosine_similarity(text_embedding_np, emoji_embedding.reshape(1, -1))[0][0]
+            similarities[emoji] = similarity
+        print(f"📈 Similarities calculated for {len(similarities)} emojis")
+        # Filter by confidence and return top K
+        filtered = [(emoji, score) for emoji, score in similarities.items() if score >= min_confidence]
+        sorted_emojis = sorted(filtered, key=lambda x: x[1], reverse=True)
+        print(f"🎯 Top predictions: {sorted_emojis[:top_k]}")
+        # If no confident predictions, return top overall
+        if not sorted_emojis:
+            top_overall = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:top_k]
+            print(f"⚠️ No confident predictions, using top overall: {top_overall}")
+            return top_overall
+        return sorted_emojis[:top_k]
 # Initialize predictor
+print("🚀 Loading YOUR Urdu Emoji Prediction Model...")
 predictor = UrduOptimizedPredictor()
 def predict_emoji(urdu_text):
+    """Main prediction function using YOUR model"""
     if not urdu_text.strip():
         return "⬅️ اردو متن لکھیں"
     try:
+        # Get predictions from YOUR model
         predictions = predictor.predict_smart(urdu_text, top_k=3, min_confidence=0.3)
         # Format output
         if predictions:
+            result = "🎯 **آپ کے ماڈل کی پیشنگو:**\n\n"
             for i, (emoji, score) in enumerate(predictions, 1):
+                confidence_level = "اعلیٰ" if score > 0.6 else "درمیانی" if score > 0.4 else "کم"
+                result += f"{i}. {emoji} - {confidence_level} درستگی ({score:.3f})\n"
+            return result
         else:
+            return "❌ آپ کے ماڈل سے کوئی مناسب ایموجی نہیں مل سکی"
     except Exception as e:
         print(f"Error in prediction: {e}")
+        return f"⚠️ نظام میں خرابی: {e}"
+# Test your model with some examples before starting the interface
+print("\n" + "="*60)
+print("🧪 TESTING YOUR MODEL WITH SAMPLE TEXTS")
+print("="*60)
+test_texts = [
+    "میں بہت خوش ہوں",
+    "دل ٹوٹ گیا ہے",
+    "دوستوں کے ساتھ پارٹی کا مزہ آیا",
+    "امی نے میری پسندیدہ ڈش بنائی ہے",
+    "غصہ سے دماغ پھٹ رہا ہے"
+]
+for text in test_texts:
+    print(f"\n📝 Testing: '{text}'")
+    predictions = predictor.predict_smart(text, top_k=3, min_confidence=0.3)
+    print(f"   → {[emoji for emoji, score in predictions]}")
+print("\n" + "="*60)
+print("🚀 STARTING GRADIO INTERFACE")
+print("="*60)
 # Create Gradio interface
+demo = gr.Blocks(title="آپ کا اردو ایموجی پیشنگو ماڈل")
+with demo:
     gr.Markdown(
         """
+        # 🎯 **آپ کا تربیت یافتہ اردو ایموجی ماڈل**
+        یہ انٹرفیس **آپ کے ہی تربیت یافتہ ماڈل** کا استعمال کر رہا ہے!
+        - **80+ Urdu emojis** آپ کے ڈیٹا سے تربیت یافتہ
+        - **10 لاکھ+ Urdu tweets** پر مبنی
+        - **Top-3 درستگی: 30.4%**
         """
     )
     with gr.Row():
         with gr.Column():
             input_text = gr.Textbox(
+                label="اردو متن درج کریں",
+                placeholder="اپنا Urdu متن یہاں لکھیں...",
                 lines=3
             )
+            predict_btn = gr.Button("🎯 ماڈل سے ایموجیز حاصل کریں", variant="primary")
         with gr.Column():
             output_text = gr.Textbox(
+                label="آپ کے ماڈل کی پیشنگو",
+                placeholder="یہاں آپ کے ماڈل کی پیشنگو ایموجیز ظاہر ہوں گی...",
+                lines=5
             )
+    gr.Markdown("### 💡 آپ کے ماڈل کی جانچ کے لیے مثالیں")
     examples = gr.Examples(
         examples=[
             ["میں آج بہت خوش ہوں اور مسکرا رہا ہوں"],
         inputs=input_text,
         outputs=output_text,
         fn=predict_emoji,
+        cache_examples=False
     )
 if __name__ == "__main__":
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
+        share=True
     )