Spaces:

shim5
/

mirrors

Runtime error

App Files Files Community

Shim commited on Jun 13, 2025

Commit

77244ea

1 Parent(s): 7aff121

Replace static responses with FLAN-T5 Hebrew-capable AI model for real conversations

Browse files

Files changed (1) hide show

app.py +102 -53

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ Main application file with Gradio interface
 import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import logging
 import sys
 from typing import List, Tuple, Optional
@@ -42,52 +42,82 @@ class MirautrApp:
             is_hf_spaces = os.getenv("SPACE_ID") is not None
             if is_hf_spaces:
-                logger.info("Running in Hugging Face Spaces - using lightweight model")
-                # Use a smaller, more suitable model for HF Spaces
-                model_name = "microsoft/DialoGPT-medium"  # Fallback to English model that works
-                logger.info("Using English model due to HF Spaces limitations")
             else:
-                model_name = "yam-peleg/Hebrew-Mistral-7B"
-                logger.info(f"Loading Hebrew model: {model_name}")
-            # For HF Spaces, skip heavy model loading and use API-based approach or lighter model
-            if is_hf_spaces:
-                logger.info("Skipping heavy model loading for HF Spaces - using demo mode")
-                self.setup_fallback_model()
-                return
             # Load tokenizer
             self.tokenizer = AutoTokenizer.from_pretrained(model_name)
             # Determine the best settings for the environment
-            if torch.cuda.is_available():
                 torch_dtype = torch.float16
                 device_map = "auto"
             else:
                 torch_dtype = torch.float32
                 device_map = None
             # Load model with appropriate settings
-            self.model = AutoModelForCausalLM.from_pretrained(
-                model_name,
-                torch_dtype=torch_dtype,
-                device_map=device_map,
-                low_cpu_mem_usage=True,
-                trust_remote_code=True  # For HF Spaces compatibility
-            )
-            # Create text generation pipeline
-            self.generator = pipeline(
-                "text-generation",
-                model=self.model,
-                tokenizer=self.tokenizer,
-                max_new_tokens=150,
-                temperature=0.7,
-                do_sample=True,
-                pad_token_id=self.tokenizer.eos_token_id,
-                return_full_text=False  # Only return generated text
-            )
             logger.info("Model loaded successfully")
@@ -129,24 +159,43 @@ class MirautrApp:
             # Prepare conversation context
             context = self.conversation_manager.get_conversation_context(conversation_state)
-            # Create the full prompt
-            full_prompt = f"{system_prompt}\n\nהקשר: {context}\n\nהמשתמש אמר: {user_message}\n\nתגובה:"
             if self.generator:
-                # Generate with the model
-                outputs = self.generator(
-                    full_prompt,
-                    max_new_tokens=150,
-                    temperature=0.7,
-                    do_sample=True,
-                    num_return_sequences=1
-                )
-                response = outputs[0]["generated_text"]
-                # Extract only the new generated part
-                response = response[len(full_prompt):].strip()
-            else:
                 # Fallback response for demo mode
                 part_info = DEFAULT_PARTS.get(conversation_state.selected_part, {})
                 persona_name = conversation_state.persona_name or part_info.get("default_persona_name", "חלק פנימי")
@@ -225,10 +274,10 @@ class MirautrApp:
             # Header
             is_hf_spaces = os.getenv("SPACE_ID") is not None
             demo_notice = """
-            <div style="background-color: #fff3cd; border: 1px solid #ffeaa7; padding: 10px; margin: 10px 0; border-radius: 5px; text-align: center;">
-                <strong>🧪 מצב הדגמה</strong><br/>
-                זהו מצב הדגמה - התגובות מבוססות על דוגמאות מוכנות מראש.<br/>
-                הגרסה המלאה תכלול מודל בינה מלאכותית מתקדם לתגובות אישיות יותר.
             </div>
             """ if is_hf_spaces else ""

 import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, pipeline
 import logging
 import sys
 from typing import List, Tuple, Optional
             is_hf_spaces = os.getenv("SPACE_ID") is not None
             if is_hf_spaces:
+                logger.info("Running in Hugging Face Spaces - using lightweight Hebrew-capable model")
+                # Use a small multilingual model that supports Hebrew and fits in HF Spaces
+                model_name = "google/flan-t5-small"  # 77M parameters, supports Hebrew
+                logger.info(f"Loading lightweight model: {model_name}")
             else:
+                # For local development, try Hebrew-specific model first
+                try:
+                    model_name = "yam-peleg/Hebrew-Mistral-7B"
+                    logger.info(f"Loading Hebrew model: {model_name}")
+                except:
+                    # Fallback to small model for local testing too
+                    model_name = "google/flan-t5-small"
+                    logger.info(f"Falling back to small model: {model_name}")
             # Load tokenizer
             self.tokenizer = AutoTokenizer.from_pretrained(model_name)
             # Determine the best settings for the environment
+            if torch.cuda.is_available() and not is_hf_spaces:
                 torch_dtype = torch.float16
                 device_map = "auto"
             else:
+                # Use CPU-friendly settings for HF Spaces
                 torch_dtype = torch.float32
                 device_map = None
             # Load model with appropriate settings
+            if "t5" in model_name.lower():
+                # Use Seq2Seq model for T5
+                self.model = AutoModelForSeq2SeqLM.from_pretrained(
+                    model_name,
+                    torch_dtype=torch_dtype,
+                    low_cpu_mem_usage=True
+                )
+            elif "mistral" in model_name.lower():
+                # Use CausalLM for Mistral with additional settings
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    model_name,
+                    torch_dtype=torch_dtype,
+                    device_map=device_map,
+                    low_cpu_mem_usage=True,
+                    trust_remote_code=True
+                )
+            else:
+                # Default to CausalLM for other models
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    model_name,
+                    torch_dtype=torch_dtype,
+                    low_cpu_mem_usage=True
+                )
+            # Create text generation pipeline with appropriate settings
+            generation_kwargs = {
+                "max_new_tokens": 100,
+                "temperature": 0.8,
+                "do_sample": True,
+                "pad_token_id": self.tokenizer.eos_token_id,
+                "return_full_text": False
+            }
+            # For T5 models, use text2text-generation
+            if "t5" in model_name.lower():
+                self.generator = pipeline(
+                    "text2text-generation",
+                    model=self.model,
+                    tokenizer=self.tokenizer,
+                    **generation_kwargs
+                )
+            else:
+                self.generator = pipeline(
+                    "text-generation",
+                    model=self.model,
+                    tokenizer=self.tokenizer,
+                    **generation_kwargs
+                )
             logger.info("Model loaded successfully")
             # Prepare conversation context
             context = self.conversation_manager.get_conversation_context(conversation_state)
+            # Try to generate with model first
+            response = None
             if self.generator:
+                try:
+                    # Check if using T5 model (text2text-generation)
+                    if hasattr(self.generator, 'task') and self.generator.task == 'text2text-generation':
+                        # For T5 models, create a more structured prompt
+                        part_description = DEFAULT_PARTS.get(conversation_state.selected_part, {}).get("description", conversation_state.selected_part)
+                        persona_name = conversation_state.persona_name or DEFAULT_PARTS.get(conversation_state.selected_part, {}).get("default_persona_name", "חלק פנימי")
+                        prompt = f"אתה {persona_name}, {part_description}. ענה בעברית על ההודעה הבאה בהתאם לאופי שלך: {user_message}"
+                        outputs = self.generator(prompt, max_length=150, num_return_sequences=1)
+                        response = outputs[0]["generated_text"].strip()
+                        # Clean up the response if it repeats the prompt
+                        if prompt in response:
+                            response = response.replace(prompt, "").strip()
+                    else:
+                        # For causal LM models
+                        full_prompt = f"{system_prompt}\n\nהקשר: {context}\n\nהמשתמש אמר: {user_message}\n\nתגובה:"
+                        outputs = self.generator(full_prompt)
+                        response = outputs[0]["generated_text"]
+                        # Extract only the new generated part
+                        response = response[len(full_prompt):].strip()
+                    # Basic validation and cleanup
+                    if not response or len(response.strip()) < 5:
+                        response = None
+                except Exception as gen_error:
+                    logger.warning(f"Model generation failed: {gen_error}, falling back to contextual response")
+                    response = None
+            # If model generation failed or no model available, use fallback
+            if not response:
                 # Fallback response for demo mode
                 part_info = DEFAULT_PARTS.get(conversation_state.selected_part, {})
                 persona_name = conversation_state.persona_name or part_info.get("default_persona_name", "חלק פנימי")
             # Header
             is_hf_spaces = os.getenv("SPACE_ID") is not None
             demo_notice = """
+            <div style="background-color: #d4edda; border: 1px solid #c3e6cb; padding: 10px; margin: 10px 0; border-radius: 5px; text-align: center;">
+                <strong>🤖 גרסה קלה</strong><br/>
+                משתמש במודל בינה מלאכותית קל התומך בעברית (FLAN-T5) המותאם לסביבת Hugging Face Spaces.<br/>
+                הגרסה המקומית משתמשת במודל עברי מתקדם יותר.
             </div>
             """ if is_hf_spaces else ""