Spaces:

harismlnaslm
/

Textilindo-AI

Sleeping

App Files Files Community

harismlnaslm commited on Oct 30, 2025

Commit

3a93207

1 Parent(s): e3e9bcd

Serverless-safe defaults: fallback from LLaMA/TinyLlama to DialoGPT; 404 auto-fallback to distilgpt2; robust prompt/cleanup for both families

Browse files

Files changed (1) hide show

app_backup.py +57 -292

app_backup.py CHANGED Viewed

@@ -150,198 +150,21 @@ class TrainingManager:
     """Manage AI model training using the training scripts"""
     def __init__(self):
-        self.training_status = {
-            "is_training": False,
-            "progress": 0,
-            "status": "idle",
-            "start_time": None,
-            "end_time": None,
-            "error": None,
-            "logs": []
-        }
-        self.training_thread = None
-    def start_training(self, model_name: str = "meta-llama/Llama-3.1-8B-Instruct", epochs: int = 3, batch_size: int = 4):
-        """Start training in background thread"""
-        if self.training_status["is_training"]:
-            return {"error": "Training already in progress"}
-        self.training_status = {
-            "is_training": True,
-            "progress": 0,
-            "status": "starting",
-            "start_time": datetime.now().isoformat(),
-            "end_time": None,
-            "error": None,
-            "logs": []
-        }
-        # Start training in background thread
-        self.training_thread = threading.Thread(
-            target=self._run_training,
-            args=(model_name, epochs, batch_size),
-            daemon=True
-        )
-        self.training_thread.start()
-        return {"message": "Training started", "status": "starting"}
-    def _run_training(self, model_name: str, epochs: int, batch_size: int):
-        """Run the actual training process"""
-        try:
-            self.training_status["status"] = "preparing"
-            self.training_status["logs"].append("Preparing training environment...")
-            # Check if training data exists
-            data_path = "data/textilindo_training_data.jsonl"
-            if not os.path.exists(data_path):
-                raise Exception("Training data not found")
-            self.training_status["status"] = "training"
-            self.training_status["logs"].append("Starting model training...")
-            # Create a simple training script for HF Spaces
-            training_script = f"""
-import os
-import sys
-import json
-import logging
-from pathlib import Path
-from datetime import datetime
-# Add current directory to path
-sys.path.append('.')
-# Setup logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-def simple_training():
-    \"\"\"Simple training simulation for HF Spaces with Llama support\"\"\"
-    logger.info("Starting training process...")
-    logger.info(f"Model: {model_name}")
-    logger.info(f"Epochs: {epochs}")
-    logger.info(f"Batch Size: {batch_size}")
-    # Load training data
-    data_path = "data/textilindo_training_data.jsonl"
-    with open(data_path, 'r', encoding='utf-8') as f:
-        data = [json.loads(line) for line in f if line.strip()]
-    logger.info(f"Loaded {{len(data)}} training samples")
-    # Model-specific training simulation
-    if "llama" in model_name.lower():
-        logger.info("Using Llama model - High quality training simulation")
-        training_steps = len(data) * {epochs} * 2  # More steps for Llama
-    else:
-        logger.info("Using standard model - Basic training simulation")
-        training_steps = len(data) * {epochs}
-    # Simulate training progress
-    for epoch in range({epochs}):
-        logger.info(f"Epoch {{epoch + 1}}/{epochs}")
-        for i, sample in enumerate(data):
-            # Simulate training step
-            progress = ((epoch * len(data) + i) / ({epochs} * len(data))) * 100
-            logger.info(f"Training progress: {{progress:.1f}}% - Processing: {{sample.get('instruction', 'Unknown')[:50]}}...")
-            # Update training status
-            with open("training_status.json", "w") as f:
-                json.dump({{
-                    "is_training": True,
-                    "progress": progress,
-                    "status": "training",
-                    "model": "{model_name}",
-                    "epoch": epoch + 1,
-                    "step": i + 1,
-                    "total_steps": len(data),
-                    "current_sample": sample.get('instruction', 'Unknown')[:50]
-                }}, f)
-    logger.info("Training completed successfully!")
-    logger.info(f"Model {model_name} has been fine-tuned with Textilindo data")
-    # Save final status
-    with open("training_status.json", "w") as f:
-        json.dump({{
-            "is_training": False,
-            "progress": 100,
-            "status": "completed",
-            "model": "{model_name}",
-            "end_time": datetime.now().isoformat(),
-            "message": f"Model {model_name} training completed successfully!"
-        }}, f)
-if __name__ == "__main__":
-    simple_training()
-"""
-            # Write training script
-            with open("run_training.py", "w") as f:
-                f.write(training_script)
-            # Run training
-            result = subprocess.run(
-                ["python", "run_training.py"],
-                capture_output=True,
-                text=True,
-                cwd="."
             )
-            if result.returncode == 0:
-                self.training_status["status"] = "completed"
-                self.training_status["progress"] = 100
-                self.training_status["logs"].append("Training completed successfully!")
-            else:
-                raise Exception(f"Training failed: {result.stderr}")
-        except Exception as e:
-            logger.error(f"Training error: {e}")
-            self.training_status["status"] = "error"
-            self.training_status["error"] = str(e)
-            self.training_status["logs"].append(f"Error: {e}")
-        finally:
-            self.training_status["is_training"] = False
-            self.training_status["end_time"] = datetime.now().isoformat()
-    def get_training_status(self):
-        """Get current training status"""
-        # Try to read from file if available
-        status_file = "training_status.json"
-        if os.path.exists(status_file):
-            try:
-                with open(status_file, "r") as f:
-                    file_status = json.load(f)
-                    self.training_status.update(file_status)
-            except:
-                pass
-        return self.training_status
-    def stop_training(self):
-        """Stop training if running"""
-        if self.training_status["is_training"]:
-            self.training_status["status"] = "stopped"
-            self.training_status["is_training"] = False
-            return {"message": "Training stopped"}
-        return {"message": "No training in progress"}
-class TextilindoAI:
-    """Textilindo AI Assistant using HuggingFace Inference API with Auto-Training"""
-    def __init__(self):
-        self.api_key = os.getenv('HUGGINGFAC_API_KEY_2')
-        # Use available model with your API key
-        self.model = os.getenv('DEFAULT_MODEL', 'meta-llama/Llama-3.2-1B-Instruct')
         self.system_prompt = self.load_system_prompt()
-        self.data_loader = TrainingDataLoader()
-        # Auto-training configuration
-        self.auto_training_enabled = True
-        self.training_interval = 300  # Train every 5 minutes
-        self.last_training_time = 0
-        self.trained_responses = {}  # Cache for trained responses
         if not self.api_key:
             logger.warning("HUGGINGFAC_API_KEY_2 not found. Using mock responses.")
@@ -491,120 +314,62 @@ Minimum purchase is 1 roll (67-70 yards)."""
             return self.get_fallback_response(user_message)
         try:
-            # Use appropriate conversation format
-            if "llama" in self.model.lower():
-                # Use proper chat format for Llama models
-                prompt = f"<|system|>\n{self.system_prompt}\n<|user|>\n{user_message}\n<|assistant|>\n"
-            elif "dialogpt" in self.model.lower():
-                prompt = f"User: {user_message}\nAssistant:"
-            elif "gpt2" in self.model.lower():
-                prompt = f"User: {user_message}\nAssistant:"
             else:
-                # Fallback format for other models
-                prompt = f"User: {user_message}\nAssistant:"
-            logger.info(f"Using model: {self.model}")
-            logger.info(f"API Key present: {bool(self.api_key)}")
-            logger.info(f"Generating response for prompt: {prompt[:100]}...")
-            # Generate response with DialoGPT-optimized parameters
-            if "dialogpt" in self.model.lower():
-                response = self.client.text_generation(
-                    prompt,
-                    max_new_tokens=150,
-                    temperature=0.8,
-                    top_p=0.9,
-                    top_k=50,
-                    repetition_penalty=1.1,
-                    do_sample=True,
-                    stop_sequences=["User:", "Assistant:", "\n\n"]
-                )
-            else:
-                # GPT-2 parameters for other models
-                response = self.client.text_generation(
-                    prompt,
-                    max_new_tokens=150,
-                    temperature=0.8,
-                    top_p=0.9,
-                    top_k=50,
-                    repetition_penalty=1.2,
-                    do_sample=True,
-                    stop_sequences=["User:", "Assistant:", "\n\n"]
-                )
-            logger.info(f"Raw AI response: {response[:200]}...")
-            # Clean up the response based on model type
-            if "llama" in self.model.lower():
-                # Clean up Llama response
                 if "<|assistant|>" in response:
                     assistant_response = response.split("<|assistant|>")[-1].strip()
                 else:
                     assistant_response = response.strip()
-                # Remove any remaining conversation markers
-                assistant_response = assistant_response.replace("<|end|>", "").strip()
-            elif "dialogpt" in self.model.lower() or "gpt2" in self.model.lower():
-                # Clean up DialoGPT/GPT-2 response
-                if "Assistant:" in response:
-                    assistant_response = response.split("Assistant:")[-1].strip()
-                else:
-                    assistant_response = response.strip()
-                # Remove any remaining conversation markers
-                assistant_response = assistant_response.replace("User:", "").replace("Assistant:", "").strip()
             else:
-                # Clean up other model responses
                 if "Assistant:" in response:
-                    assistant_response = response.split("Assistant:")[-1].strip()
-                else:
-                    assistant_response = response.strip()
-                # Remove any remaining conversation markers
-                assistant_response = assistant_response.replace("User:", "").replace("Assistant:", "").strip()
-            # Remove any incomplete sentences or cut-off text
-            if assistant_response.endswith(('.', '!', '?')):
-                pass  # Complete sentence
-            elif '.' in assistant_response:
-                # Take only the first complete sentence
-                assistant_response = assistant_response.split('.')[0] + '.'
-            else:
-                # If no complete sentence, take first 100 characters
-                assistant_response = assistant_response[:100]
-            logger.info(f"Cleaned AI response: {assistant_response[:100]}...")
-            # If response is too short or generic, use fallback
-            if len(assistant_response) < 10 or "I don't know" in assistant_response.lower():
-                logger.warning("AI response too short, using fallback response")
-                return self.get_fallback_response(user_message)
-            return assistant_response
         except Exception as e:
             logger.error(f"Error generating response: {e}")
-            logger.error(f"Error type: {type(e).__name__}")
-            logger.error(f"Error details: {str(e)}")
-            # Try training data as fallback
-            training_match = self.data_loader.find_best_match(user_message)
-            if training_match:
-                logger.info("Using training data as fallback after API error")
-                return training_match.get('output', '')
-            return self.get_fallback_response(user_message)
-    def get_fallback_response(self, user_message: str) -> str:
-        """Fallback response when no training data match and no API available"""
-        # Try to give a more contextual response based on the question
-        if "hello" in user_message.lower() or "hi" in user_message.lower():
-            return "Halo! Saya adalah asisten AI Textilindo. Bagaimana saya bisa membantu Anda hari ini? 😊"
-        elif "weather" in user_message.lower() or "cuaca" in user_message.lower():
-            return "Maaf, saya tidak bisa memberikan informasi cuaca. Tapi saya bisa membantu Anda dengan pertanyaan tentang produk dan layanan Textilindo!"
-        elif "how are you" in user_message.lower() or "apa kabar" in user_message.lower():
-            return "Saya baik-baik saja, terima kasih! Saya siap membantu Anda dengan pertanyaan tentang Textilindo. Ada yang bisa saya bantu?"
-        else:
-            return f"Halo! Saya adalah asisten AI Textilindo. Saya bisa membantu Anda dengan pertanyaan tentang produk dan layanan kami, atau sekadar mengobrol! Bagaimana saya bisa membantu Anda hari ini? 😊"
     def get_mock_response(self, user_message: str) -> str:
         """Enhanced mock responses with better context awareness"""

     """Manage AI model training using the training scripts"""
     def __init__(self):
+        self.api_key = os.getenv('HUGGINGFACE_API_KEY') or os.getenv('HF_TOKEN')
+        # Resolve model with safe defaults for HF Serverless Inference
+        requested_model = (os.getenv('DEFAULT_MODEL') or '').strip()
+        unsupported = ['meta-llama/', 'llama-', 'llama ', 'llama-', 'tinyllama', 'gemma']
+        if not requested_model or any(x in requested_model.lower() for x in unsupported):
+            # Fallback to widely available serverless models
+            self.model = 'microsoft/DialoGPT-medium'
+            logger.warning(
+                f"DEFAULT_MODEL '{requested_model or 'unset'}' not available on Serverless Inference. "
+                f"Falling back to {self.model}."
             )
+        else:
+            self.model = requested_model
         self.system_prompt = self.load_system_prompt()
+        self._fallback_model = 'distilgpt2'
         if not self.api_key:
             logger.warning("HUGGINGFAC_API_KEY_2 not found. Using mock responses.")
             return self.get_fallback_response(user_message)
         try:
+            # Create full prompt with system prompt
+            if any(x in self.model.lower() for x in ['llama', 'tinyllama', 'gemma']):
+                full_prompt = f"<|system|>\n{self.system_prompt}\n<|user|>\n{user_message}\n<|assistant|>\n"
             else:
+                full_prompt = f"User: {user_message}\nAssistant:"
+            # Generate response
+            response = self.client.text_generation(
+                full_prompt,
+                max_new_tokens=512,
+                temperature=0.7,
+                top_p=0.9,
+                top_k=40,
+                repetition_penalty=1.1,
+                stop_sequences=["<|end|>", "<|user|>", "User:", "Assistant:"]
+            )
+            # Extract only the assistant's response
+            if any(x in self.model.lower() for x in ['llama', 'tinyllama', 'gemma']):
                 if "<|assistant|>" in response:
                     assistant_response = response.split("<|assistant|>")[-1].strip()
                 else:
                     assistant_response = response.strip()
+                return assistant_response.replace("<|end|>", "").strip()
             else:
                 if "Assistant:" in response:
+                    return response.split("Assistant:")[-1].strip()
+                return response.strip()
         except Exception as e:
             logger.error(f"Error generating response: {e}")
+            # One-time fallback if current model is not available (404 Not Found)
+            err_text = str(e).lower()
+            if ("404" in err_text or "not found" in err_text) and self.model != self._fallback_model:
+                try:
+                    logger.warning(
+                        f"Model {self.model} unavailable on serverless. Falling back to {self._fallback_model} and retrying."
+                    )
+                    self.model = self._fallback_model
+                    self.client = InferenceClient(token=self.api_key, model=self.model)
+                    retry_prompt = f"User: {user_message}\nAssistant:"
+                    response = self.client.text_generation(
+                        retry_prompt,
+                        max_new_tokens=200,
+                        temperature=0.7,
+                        top_p=0.9,
+                        top_k=40,
+                        repetition_penalty=1.1,
+                        stop_sequences=["User:", "Assistant:"]
+                    )
+                    if "Assistant:" in response:
+                        return response.split("Assistant:")[-1].strip()
+                    return response.strip()
+                except Exception as e2:
+                    logger.error(f"Fallback retry failed: {e2}")
+            return self.get_mock_response(user_message)
     def get_mock_response(self, user_message: str) -> str:
         """Enhanced mock responses with better context awareness"""