Spaces:

harismlnaslm
/

Textilindo-2

Sleeping

File size: 17,117 Bytes

#!/usr/bin/env python3
"""
Textilindo AI API Server - Llama-based
Uses local Llama model with LoRA weights
"""

from flask import Flask, request, jsonify
from flask_cors import CORS
import os
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
from difflib import SequenceMatcher
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = Flask(__name__)
CORS(app)  # Enable CORS for all routes

def load_system_prompt(default_text):
    try:
        base_dir = os.path.dirname(__file__)
        md_path = os.path.join(base_dir, 'configs', 'system_prompt.md')
        if not os.path.exists(md_path):
            return default_text
        with open(md_path, 'r', encoding='utf-8') as f:
            content = f.read()
        start = content.find('"""')
        end = content.rfind('"""')
        if start != -1 and end != -1 and end > start:
            return content[start+3:end].strip()
        lines = []
        for line in content.splitlines():
            if line.strip().startswith('#'):
                continue
            lines.append(line)
        cleaned = '\n'.join(lines).strip()
        return cleaned or default_text
    except Exception:
        return default_text

class TextilindoAI:
    def __init__(self):
        self.model = None
        self.tokenizer = None
        self.system_prompt = os.getenv(
            'SYSTEM_PROMPT',
            load_system_prompt("You are Textilindo AI Assistant. Be concise, helpful, and use Indonesian.")
        )
        self.dataset = self.load_all_datasets()
        self.model_path = os.getenv('MODEL_PATH', './models/llama-3.2-1b-instruct')
        self.lora_path = os.getenv('LORA_PATH', './models/textilindo-ai-lora')
        
    def load_all_datasets(self):
        """Load all available datasets"""
        dataset = []
        
        # Try multiple possible data directory paths
        possible_data_dirs = [
            "data",
            "./data", 
            "/app/data",
            os.path.join(os.path.dirname(__file__), "data")
        ]
        
        data_dir = None
        for dir_path in possible_data_dirs:
            if os.path.exists(dir_path):
                data_dir = dir_path
                logger.info(f"Found data directory: {data_dir}")
                break
        
        if not data_dir:
            logger.warning("No data directory found in any of the expected locations")
            return dataset
        
        # Load all JSONL files
        try:
            for filename in os.listdir(data_dir):
                if filename.endswith('.jsonl'):
                    filepath = os.path.join(data_dir, filename)
                    try:
                        with open(filepath, 'r', encoding='utf-8') as f:
                            for line_num, line in enumerate(f, 1):
                                line = line.strip()
                                if line:
                                    try:
                                        data = json.loads(line)
                                        dataset.append(data)
                                    except json.JSONDecodeError as e:
                                        logger.warning(f"Invalid JSON in {filename} line {line_num}: {e}")
                                        continue
                        logger.info(f"Loaded {filename}: {len([d for d in dataset if d.get('instruction')])} examples")
                    except Exception as e:
                        logger.error(f"Error loading {filename}: {e}")
        except Exception as e:
            logger.error(f"Error reading data directory {data_dir}: {e}")
        
        logger.info(f"Total examples loaded: {len(dataset)}")
        return dataset
    
    def load_model(self):
        """Load Llama model with LoRA weights"""
        if self.model is not None:
            return  # Already loaded
        
        try:
            logger.info(f"Loading base model from: {self.model_path}")
            
            # Load tokenizer
            self.tokenizer = AutoTokenizer.from_pretrained(
                self.model_path,
                trust_remote_code=True
            )
            
            if self.tokenizer.pad_token is None:
                self.tokenizer.pad_token = self.tokenizer.eos_token
            
            # Load base model
            self.model = AutoModelForCausalLM.from_pretrained(
                self.model_path,
                torch_dtype=torch.float16,
                device_map="auto",
                trust_remote_code=True
            )
            
            # Load LoRA weights if available
            if os.path.exists(self.lora_path):
                logger.info(f"Loading LoRA weights from: {self.lora_path}")
                self.model = PeftModel.from_pretrained(self.model, self.lora_path)
            else:
                logger.warning("No LoRA weights found, using base model")
            
            logger.info("Model loaded successfully")
            
        except Exception as e:
            logger.error(f"Error loading model: {e}")
            raise e
    
    def find_relevant_context(self, user_query, top_k=3):
        """Find most relevant examples from dataset"""
        if not self.dataset:
            return []
        
        scores = []
        for i, example in enumerate(self.dataset):
            instruction = example.get('instruction', '').lower()
            output = example.get('output', '').lower()
            query = user_query.lower()
            
            instruction_score = SequenceMatcher(None, query, instruction).ratio()
            output_score = SequenceMatcher(None, query, output).ratio()
            combined_score = (instruction_score * 0.7) + (output_score * 0.3)
            scores.append((combined_score, i))
        
        scores.sort(reverse=True)
        relevant_examples = []
        
        for score, idx in scores[:top_k]:
            if score > 0.1:
                relevant_examples.append(self.dataset[idx])
        
        return relevant_examples
    
    def create_context_prompt(self, user_query, relevant_examples):
        """Create a prompt with relevant context"""
        if not relevant_examples:
            return user_query
        
        context_parts = []
        context_parts.append("Berikut adalah beberapa contoh pertanyaan dan jawaban tentang Textilindo:")
        context_parts.append("")
        
        for i, example in enumerate(relevant_examples, 1):
            instruction = example.get('instruction', '')
            output = example.get('output', '')
            context_parts.append(f"Contoh {i}:")
            context_parts.append(f"Pertanyaan: {instruction}")
            context_parts.append(f"Jawaban: {output}")
            context_parts.append("")
        
        context_parts.append("Berdasarkan contoh di atas, jawab pertanyaan berikut:")
        context_parts.append(f"Pertanyaan: {user_query}")
        context_parts.append("Jawaban:")
        
        return "\n".join(context_parts)
    
    def generate_response(self, prompt, max_tokens=300, temperature=0.7):
        """Generate response using Llama model"""
        try:
            # Load model if not already loaded
            self.load_model()
            
            # Tokenize input
            inputs = self.tokenizer.encode(prompt, return_tensors="pt")
            
            # Generate response
            with torch.no_grad():
                outputs = self.model.generate(
                    inputs,
                    max_new_tokens=max_tokens,
                    temperature=temperature,
                    do_sample=True,
                    pad_token_id=self.tokenizer.eos_token_id,
                    eos_token_id=self.tokenizer.eos_token_id,
                    repetition_penalty=1.1
                )
            
            # Decode response
            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            
            # Remove input prompt from response
            if prompt in response:
                response = response.replace(prompt, "").strip()
            
            return response
            
        except Exception as e:
            logger.error(f"Error generating response: {e}")
            return f"Error generating response: {str(e)}"
    
    def chat(self, message, max_tokens=300, temperature=0.7, system_prompt_override=None):
        """Generate response using Llama with RAG context"""
        try:
            # Find relevant context
            relevant_examples = self.find_relevant_context(message, 3)
            
            # Create enhanced prompt
            if relevant_examples:
                enhanced_prompt = self.create_context_prompt(message, relevant_examples)
                context_used = True
            else:
                enhanced_prompt = message
                context_used = False
            
            # Add system prompt
            system_prompt = system_prompt_override or self.system_prompt
            full_prompt = f"System: {system_prompt}\n\nUser: {enhanced_prompt}\n\nAssistant:"
            
            # Generate response
            response = self.generate_response(full_prompt, max_tokens, temperature)
            
            return {
                "success": True,
                "response": response,
                "context_used": context_used,
                "relevant_examples_count": len(relevant_examples),
                "model": "llama-3.2-1b-instruct",
                "tokens_used": len(response.split())  # Approximate token count
            }
            
        except Exception as e:
            logger.error(f"Error in chat: {e}")
            return {
                "success": False,
                "error": f"Chat error: {str(e)}"
            }

# Initialize AI (lazy loading)
ai = None

def get_ai_assistant():
    """Get or create the AI assistant instance"""
    global ai
    if ai is None:
        try:
            logger.info("Initializing Textilindo AI Assistant...")
            ai = TextilindoAI()
            logger.info("AI Assistant initialized successfully")
        except Exception as e:
            logger.error(f"Failed to initialize AI Assistant: {e}")
            # Create a minimal fallback
            ai = type('FallbackAI', (), {
                'dataset': [],
                'chat': lambda self, message, **kwargs: {
                    "success": False,
                    "error": f"AI Assistant is not available. Error: {str(e)}"
                }
            })()
    return ai

@app.route('/health', methods=['GET'])
def health_check():
    """Health check endpoint"""
    try:
        ai_assistant = get_ai_assistant()
        return jsonify({
            "status": "healthy",
            "service": "Textilindo AI API (Llama-based)",
            "model": "llama-3.2-1b-instruct",
            "dataset_loaded": len(ai_assistant.dataset) > 0,
            "dataset_size": len(ai_assistant.dataset)
        })
    except Exception as e:
        return jsonify({
            "status": "error",
            "error": str(e)
        }), 500

@app.route('/chat', methods=['POST'])
def chat():
    """Main chat endpoint"""
    try:
        data = request.get_json()
        
        if not data:
            return jsonify({
                "success": False,
                "error": "No JSON data provided"
            }), 400
        
        message = data.get('message', '').strip()
        if not message:
            return jsonify({
                "success": False,
                "error": "Message is required"
            }), 400
        
        # Optional parameters
        max_tokens = data.get('max_tokens', 300)
        temperature = data.get('temperature', 0.7)
        system_prompt = data.get('system_prompt')
        
        # Validate parameters
        if not isinstance(max_tokens, int) or max_tokens < 1 or max_tokens > 1000:
            return jsonify({
                "success": False,
                "error": "max_tokens must be between 1 and 1000"
            }), 400
        
        if not isinstance(temperature, (int, float)) or temperature < 0 or temperature > 2:
            return jsonify({
                "success": False,
                "error": "temperature must be between 0 and 2"
            }), 400
        
        # Get AI assistant and process chat
        ai_assistant = get_ai_assistant()
        result = ai_assistant.chat(message, max_tokens, temperature, system_prompt_override=system_prompt)
        
        if result["success"]:
            return jsonify(result)
        else:
            return jsonify(result), 500
            
    except Exception as e:
        logger.error(f"Error in chat endpoint: {e}")
        return jsonify({
            "success": False,
            "error": f"Internal server error: {str(e)}"
        }), 500

@app.route('/stats', methods=['GET'])
def get_stats():
    """Get dataset and system statistics"""
    try:
        ai_assistant = get_ai_assistant()
        topics = {}
        for example in ai_assistant.dataset:
            metadata = example.get('metadata', {})
            topic = metadata.get('topic', 'unknown')
            topics[topic] = topics.get(topic, 0) + 1
        
        return jsonify({
            "success": True,
            "dataset": {
                "total_examples": len(ai_assistant.dataset),
                "topics": topics,
                "topics_count": len(topics)
            },
            "model": {
                "name": "llama-3.2-1b-instruct",
                "type": "Local Llama with LoRA"
            },
            "system": {
                "api_version": "1.0.0",
                "status": "operational"
            }
        })
        
    except Exception as e:
        logger.error(f"Error in stats endpoint: {e}")
        return jsonify({
            "success": False,
            "error": f"Internal server error: {str(e)}"
        }), 500

@app.route('/examples', methods=['GET'])
def get_examples():
    """Get sample questions from dataset"""
    try:
        ai_assistant = get_ai_assistant()
        limit = request.args.get('limit', 10, type=int)
        limit = min(limit, 50)  # Max 50 examples
        
        examples = []
        for example in ai_assistant.dataset[:limit]:
            examples.append({
                "instruction": example.get('instruction', ''),
                "output": example.get('output', ''),
                "topic": example.get('metadata', {}).get('topic', 'unknown')
            })
        
        return jsonify({
            "success": True,
            "examples": examples,
            "total_returned": len(examples),
            "total_available": len(ai_assistant.dataset)
        })
        
    except Exception as e:
        logger.error(f"Error in examples endpoint: {e}")
        return jsonify({
            "success": False,
            "error": f"Internal server error: {str(e)}"
        }), 500

@app.route('/', methods=['GET'])
def root():
    """API root endpoint with documentation"""
    try:
        ai_assistant = get_ai_assistant()
        return jsonify({
            "service": "Textilindo AI API (Llama-based)",
            "version": "1.0.0",
            "description": "AI-powered customer service for Textilindo using Llama 3.2 1B with LoRA",
            "endpoints": {
                "GET /": "API documentation (this endpoint)",
                "GET /health": "Health check",
                "POST /chat": "Chat with AI",
                "GET /stats": "Dataset and system statistics",
                "GET /examples": "Sample questions from dataset"
            },
            "usage": {
                "chat": {
                    "method": "POST",
                    "url": "/chat",
                    "body": {
                        "message": "string (required)",
                        "max_tokens": "integer (optional, default: 300)",
                        "temperature": "float (optional, default: 0.7)"
                    }
                }
            },
            "model": "llama-3.2-1b-instruct",
            "dataset_size": len(ai_assistant.dataset)
        })
    except Exception as e:
        return jsonify({
            "success": False,
            "error": f"Internal server error: {str(e)}"
        }), 500

if __name__ == '__main__':
    logger.info("Starting Textilindo AI API Server (Llama-based)...")
    
    # Try to initialize AI assistant early to catch any issues
    try:
        ai_assistant = get_ai_assistant()
        logger.info(f"Dataset loaded: {len(ai_assistant.dataset)} examples")
    except Exception as e:
        logger.warning(f"AI Assistant initialization failed: {e}")
        logger.info("Continuing with fallback mode...")
    
    # Get port from environment variable (for Hugging Face Spaces)
    port = int(os.environ.get('PORT', 7860))
    
    app.run(
        debug=False,
        host='0.0.0.0',
        port=port
    )