Spaces:

harismlnaslm
/

Textilindo-2

Sleeping

App Files Files Community

harismlnaslm commited on Oct 24, 2025

Commit

60664bc

1 Parent(s): cb197a6

Fix Hugging Face Spaces compatibility: Use standard app.py and proper routing

Browse files

Files changed (2) hide show

Dockerfile +1 -1
app.py +376 -0

Dockerfile CHANGED Viewed

@@ -29,4 +29,4 @@ USER user
 EXPOSE 7860
 # Run the application
-CMD ["python", "api_server_simple.py"]

 EXPOSE 7860
 # Run the application
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,376 @@

+#!/usr/bin/env python3
+"""
+Textilindo AI API Server - Hugging Face Spaces Compatible
+Uses dataset-based similarity matching without heavy ML dependencies
+"""
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+import os
+import json
+from difflib import SequenceMatcher
+import logging
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = Flask(__name__)
+CORS(app)  # Enable CORS for all routes
+def load_system_prompt(default_text):
+    try:
+        base_dir = os.path.dirname(__file__)
+        md_path = os.path.join(base_dir, 'configs', 'system_prompt.md')
+        if not os.path.exists(md_path):
+            return default_text
+        with open(md_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+        start = content.find('"""')
+        end = content.rfind('"""')
+        if start != -1 and end != -1 and end > start:
+            return content[start+3:end].strip()
+        lines = []
+        for line in content.splitlines():
+            if line.strip().startswith('#'):
+                continue
+            lines.append(line)
+        cleaned = '\n'.join(lines).strip()
+        return cleaned or default_text
+    except Exception:
+        return default_text
+class TextilindoAI:
+    def __init__(self):
+        self.system_prompt = os.getenv(
+            'SYSTEM_PROMPT',
+            load_system_prompt("You are Textilindo AI Assistant. Be concise, helpful, and use Indonesian.")
+        )
+        self.dataset = self.load_all_datasets()
+    def load_all_datasets(self):
+        """Load all available datasets"""
+        dataset = []
+        # Try multiple possible data directory paths
+        possible_data_dirs = [
+            "data",
+            "./data",
+            "/app/data",
+            os.path.join(os.path.dirname(__file__), "data")
+        ]
+        data_dir = None
+        for dir_path in possible_data_dirs:
+            if os.path.exists(dir_path):
+                data_dir = dir_path
+                logger.info(f"Found data directory: {data_dir}")
+                break
+        if not data_dir:
+            logger.warning("No data directory found in any of the expected locations")
+            return dataset
+        # Load all JSONL files
+        try:
+            for filename in os.listdir(data_dir):
+                if filename.endswith('.jsonl'):
+                    filepath = os.path.join(data_dir, filename)
+                    try:
+                        with open(filepath, 'r', encoding='utf-8') as f:
+                            for line_num, line in enumerate(f, 1):
+                                line = line.strip()
+                                if line:
+                                    try:
+                                        data = json.loads(line)
+                                        dataset.append(data)
+                                    except json.JSONDecodeError as e:
+                                        logger.warning(f"Invalid JSON in {filename} line {line_num}: {e}")
+                                        continue
+                        logger.info(f"Loaded {filename}: {len([d for d in dataset if d.get('instruction')])} examples")
+                    except Exception as e:
+                        logger.error(f"Error loading {filename}: {e}")
+        except Exception as e:
+            logger.error(f"Error reading data directory {data_dir}: {e}")
+        logger.info(f"Total examples loaded: {len(dataset)}")
+        return dataset
+    def find_relevant_context(self, user_query, top_k=3):
+        """Find most relevant examples from dataset"""
+        if not self.dataset:
+            return []
+        scores = []
+        for i, example in enumerate(self.dataset):
+            instruction = example.get('instruction', '').lower()
+            output = example.get('output', '').lower()
+            query = user_query.lower()
+            instruction_score = SequenceMatcher(None, query, instruction).ratio()
+            output_score = SequenceMatcher(None, query, output).ratio()
+            combined_score = (instruction_score * 0.7) + (output_score * 0.3)
+            scores.append((combined_score, i))
+        scores.sort(reverse=True)
+        relevant_examples = []
+        for score, idx in scores[:top_k]:
+            if score > 0.1:
+                relevant_examples.append(self.dataset[idx])
+        return relevant_examples
+    def generate_response(self, user_query, relevant_examples):
+        """Generate response based on relevant examples"""
+        if not relevant_examples:
+            return "Maaf, saya tidak memiliki informasi yang cukup untuk menjawab pertanyaan Anda. Silakan hubungi Textilindo langsung untuk informasi lebih lanjut."
+        # Find the most relevant example
+        best_example = relevant_examples[0]
+        best_answer = best_example.get('output', '')
+        if best_answer:
+            return f"Berdasarkan informasi yang tersedia: {best_answer}"
+        else:
+            return "Saya menemukan beberapa informasi terkait, tetapi tidak dapat memberikan jawaban yang tepat. Silakan coba rephrasing pertanyaan Anda."
+    def chat(self, message, max_tokens=300, temperature=0.7, system_prompt_override=None):
+        """Generate response using RAG context"""
+        try:
+            # Find relevant context
+            relevant_examples = self.find_relevant_context(message, 3)
+            # Generate response
+            response = self.generate_response(message, relevant_examples)
+            return {
+                "success": True,
+                "response": response,
+                "context_used": len(relevant_examples) > 0,
+                "relevant_examples_count": len(relevant_examples),
+                "model": "textilindo-rag",
+                "tokens_used": len(response.split())  # Approximate token count
+            }
+        except Exception as e:
+            logger.error(f"Error in chat: {e}")
+            return {
+                "success": False,
+                "error": f"Chat error: {str(e)}"
+            }
+# Initialize AI (lazy loading)
+ai = None
+def get_ai_assistant():
+    """Get or create the AI assistant instance"""
+    global ai
+    if ai is None:
+        try:
+            logger.info("Initializing Textilindo AI Assistant...")
+            ai = TextilindoAI()
+            logger.info("AI Assistant initialized successfully")
+        except Exception as e:
+            logger.error(f"Failed to initialize AI Assistant: {e}")
+            # Create a minimal fallback
+            ai = type('FallbackAI', (), {
+                'dataset': [],
+                'chat': lambda self, message, **kwargs: {
+                    "success": False,
+                    "error": f"AI Assistant is not available. Error: {str(e)}"
+                }
+            })()
+    return ai
+@app.route('/health', methods=['GET'])
+def health_check():
+    """Health check endpoint"""
+    try:
+        ai_assistant = get_ai_assistant()
+        return jsonify({
+            "status": "healthy",
+            "service": "Textilindo AI API (RAG-based)",
+            "model": "textilindo-rag",
+            "dataset_loaded": len(ai_assistant.dataset) > 0,
+            "dataset_size": len(ai_assistant.dataset)
+        })
+    except Exception as e:
+        return jsonify({
+            "status": "error",
+            "error": str(e)
+        }), 500
+@app.route('/chat', methods=['POST'])
+def chat():
+    """Main chat endpoint"""
+    try:
+        data = request.get_json()
+        if not data:
+            return jsonify({
+                "success": False,
+                "error": "No JSON data provided"
+            }), 400
+        message = data.get('message', '').strip()
+        if not message:
+            return jsonify({
+                "success": False,
+                "error": "Message is required"
+            }), 400
+        # Optional parameters
+        max_tokens = data.get('max_tokens', 300)
+        temperature = data.get('temperature', 0.7)
+        system_prompt = data.get('system_prompt')
+        # Validate parameters
+        if not isinstance(max_tokens, int) or max_tokens < 1 or max_tokens > 1000:
+            return jsonify({
+                "success": False,
+                "error": "max_tokens must be between 1 and 1000"
+            }), 400
+        if not isinstance(temperature, (int, float)) or temperature < 0 or temperature > 2:
+            return jsonify({
+                "success": False,
+                "error": "temperature must be between 0 and 2"
+            }), 400
+        # Get AI assistant and process chat
+        ai_assistant = get_ai_assistant()
+        result = ai_assistant.chat(message, max_tokens, temperature, system_prompt_override=system_prompt)
+        if result["success"]:
+            return jsonify(result)
+        else:
+            return jsonify(result), 500
+    except Exception as e:
+        logger.error(f"Error in chat endpoint: {e}")
+        return jsonify({
+            "success": False,
+            "error": f"Internal server error: {str(e)}"
+        }), 500
+@app.route('/stats', methods=['GET'])
+def get_stats():
+    """Get dataset and system statistics"""
+    try:
+        ai_assistant = get_ai_assistant()
+        topics = {}
+        for example in ai_assistant.dataset:
+            metadata = example.get('metadata', {})
+            topic = metadata.get('topic', 'unknown')
+            topics[topic] = topics.get(topic, 0) + 1
+        return jsonify({
+            "success": True,
+            "dataset": {
+                "total_examples": len(ai_assistant.dataset),
+                "topics": topics,
+                "topics_count": len(topics)
+            },
+            "model": {
+                "name": "textilindo-rag",
+                "type": "RAG-based similarity matching"
+            },
+            "system": {
+                "api_version": "1.0.0",
+                "status": "operational"
+            }
+        })
+    except Exception as e:
+        logger.error(f"Error in stats endpoint: {e}")
+        return jsonify({
+            "success": False,
+            "error": f"Internal server error: {str(e)}"
+        }), 500
+@app.route('/examples', methods=['GET'])
+def get_examples():
+    """Get sample questions from dataset"""
+    try:
+        ai_assistant = get_ai_assistant()
+        limit = request.args.get('limit', 10, type=int)
+        limit = min(limit, 50)  # Max 50 examples
+        examples = []
+        for example in ai_assistant.dataset[:limit]:
+            examples.append({
+                "instruction": example.get('instruction', ''),
+                "output": example.get('output', ''),
+                "topic": example.get('metadata', {}).get('topic', 'unknown')
+            })
+        return jsonify({
+            "success": True,
+            "examples": examples,
+            "total_returned": len(examples),
+            "total_available": len(ai_assistant.dataset)
+        })
+    except Exception as e:
+        logger.error(f"Error in examples endpoint: {e}")
+        return jsonify({
+            "success": False,
+            "error": f"Internal server error: {str(e)}"
+        }), 500
+@app.route('/', methods=['GET'])
+def root():
+    """API root endpoint with documentation"""
+    try:
+        ai_assistant = get_ai_assistant()
+        return jsonify({
+            "service": "Textilindo AI API (RAG-based)",
+            "version": "1.0.0",
+            "description": "AI-powered customer service for Textilindo using RAG similarity matching",
+            "endpoints": {
+                "GET /": "API documentation (this endpoint)",
+                "GET /health": "Health check",
+                "POST /chat": "Chat with AI",
+                "GET /stats": "Dataset and system statistics",
+                "GET /examples": "Sample questions from dataset"
+            },
+            "usage": {
+                "chat": {
+                    "method": "POST",
+                    "url": "/chat",
+                    "body": {
+                        "message": "string (required)",
+                        "max_tokens": "integer (optional, default: 300)",
+                        "temperature": "float (optional, default: 0.7)"
+                    }
+                }
+            },
+            "model": "textilindo-rag",
+            "dataset_size": len(ai_assistant.dataset)
+        })
+    except Exception as e:
+        return jsonify({
+            "success": False,
+            "error": f"Internal server error: {str(e)}"
+        }), 500
+if __name__ == '__main__':
+    logger.info("Starting Textilindo AI API Server (RAG-based)...")
+    # Try to initialize AI assistant early to catch any issues
+    try:
+        ai_assistant = get_ai_assistant()
+        logger.info(f"Dataset loaded: {len(ai_assistant.dataset)} examples")
+    except Exception as e:
+        logger.warning(f"AI Assistant initialization failed: {e}")
+        logger.info("Continuing with fallback mode...")
+    # Get port from environment variable (for Hugging Face Spaces)
+    port = int(os.environ.get('PORT', 7860))
+    app.run(
+        debug=False,
+        host='0.0.0.0',
+        port=port
+    )