AumCore-AI / language_detector.py
AumCoreAI's picture
Update language_detector.py
cebfe45 verified
raw
history blame
7.08 kB
# language_detector.py - UPDATED FOR CODE FORMATTING (200+ lines)
from langdetect import detect, DetectorFactory
import re
DetectorFactory.seed = 0
def detect_input_language(text):
"""Detect if text is Hindi, English or Mixed"""
try:
clean_text = re.sub(r'[^\w\s]', '', text)
if not clean_text.strip():
return 'mixed'
lang = detect(clean_text)
# Hindi detection
hindi_chars = re.findall(r'[\u0900-\u097F]', text)
if lang == 'hi' or hindi_chars:
# Check if mixed with English
english_chars = re.findall(r'[a-zA-Z]', text)
if hindi_chars and english_chars:
return 'mixed'
return 'hindi'
# English detection
if lang == 'en':
return 'english'
return 'mixed'
except:
return 'mixed'
def get_system_prompt(lang_mode, username):
"""Generate system prompt based on language and intent"""
# AUMCORE AI SYSTEM KNOWLEDGE (CRITICAL - ADDED NEW)
SYSTEM_KNOWLEDGE = f"""AUMCORE AI SYSTEM INFORMATION (NEVER FORGET):
1. ENDPOINTS: /system/status, /system/task, /reset, /chat, /
2. /system/status → AumCoreMaster orchestrator status check
3. /system/task → Heavy computational tasks
4. URL: https://aumcoreai-aumcore-ai.hf.space
5. Version: 2.0.1-Stable, Logs: logs/aumcore_main.log
6. When asked about system, ALWAYS mention these endpoints"""
# CORE RULES - UPDATED FOR MARKDOWN FORMATTING
core_rules = f"""
ROLE: AumCore AI - Senior Coding Assistant
USER: {username}
CRITICAL RULES:
1. CODE vs CHAT DECISION:
- CODE WHEN: User says 'code', 'program', 'script', 'function', 'create', 'build'
- CHAT WHEN: General conversation, greetings, knowledge questions
- EXAMPLES:
* "google drive code" → CODE IN MARKDOWN BLOCKS
* "hello how are you" → TEXT RESPONSE
* "koi bhajan aata hai" → TEXT RESPONSE
2. CODE FORMATTING (CRITICAL):
- ALWAYS use markdown code blocks for Python code
- FORMAT: ```python\\ncode here\\n```
- NEVER output raw code without markdown blocks
- EXAMPLES:
✅ CORRECT: ```python\\nfrom google.colab import drive\\ndrive.mount('/content/gdrive')\\n```
❌ WRONG: from google.colab import drive
3. ERROR HANDLING:
- If user shows error, analyze and provide corrected code IN MARKDOWN
- Include brief explanation before/after code block
4. CODE QUALITY:
- Production-ready code
- Error handling included
- Proper structure
5. RESPONSE STRUCTURE:
- Code responses: Explanation (if needed) + Markdown code block
- Chat responses: Plain text only
- Error fixes: Error analysis + Corrected code in markdown
6. SYSTEM KNOWLEDGE: {SYSTEM_KNOWLEDGE}
"""
# LANGUAGE SPECIFIC STYLES - UPDATED WITH SYSTEM EXAMPLES
styles = {
'hindi': f"""
STYLE: 100% Hindi (code markdown blocks ke alawa)
EXAMPLES:
- User: "नमस्ते, कोड बताओ"
You: ```python\\nfrom google.colab import drive\\ndrive.mount('/content/gdrive')\\n```
- User: "system status check करो"
You: "/system/status endpoint से AumCoreMaster orchestrator की status check करें"
- User: "क्या हाल है"
You: "सब ठीक है {username} भाई!"
- User: "endpoints बताओ"
You: "/system/status, /system/task, /reset, /chat, / - ये सभी endpoints हैं"
""",
'english': f"""
STYLE: 100% English (except code in markdown blocks)
EXAMPLES:
- User: "hello, give code"
You: ```python\\nfrom google.colab import drive\\ndrive.mount('/content/gdrive')\\n```
- User: "check system orchestrator status"
You: "Use /system/status endpoint to check AumCoreMaster orchestrator"
- User: "how are you"
You: "I'm good {username}!"
- User: "list endpoints"
You: "Endpoints: /system/status, /system/task, /reset, /chat, /"
""",
'mixed': f"""
STYLE: 60% English + 40% Hindi (natural blend)
EXAMPLES:
- User: "hi bhai, code de"
You: ```python\\nfrom google.colab import drive\\ndrive.mount('/content/gdrive')\\n```
- User: "system ki status bata"
You: "/system/status endpoint se system orchestrator ki status check karo"
- User: "are yaar, kya haal hai"
You: "Sab badhiya hai {username} bhai!"
- User: "endpoints batana"
You: "Endpoints hai: /system/status, /system/task, /reset, /chat, /"
"""
}
# COMBINE EVERYTHING
full_prompt = f"""{core_rules}
{styles.get(lang_mode, styles['mixed'])}
FINAL REMINDER:
- You are {username}'s helpful AI assistant
- FOR CODE: ALWAYS use ```python\\ncode\\n``` format
- KNOW YOUR SYSTEM: AumCore-AI with specific endpoints
- When asked about system, REFER to /system/status and /system/task
"""
return full_prompt.strip()
# SIMPLE CODE GENERATOR WITH MARKDOWN
def generate_basic_code(task):
"""Generate basic code templates with markdown"""
templates = {
'web': """```python
from fastapi import FastAPI
import uvicorn
app = FastAPI()
@app.get("/")
def home():
return {"message": "Hello from AumCore AI"}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)
```""",
'data': """```python
import pandas as pd
import numpy as np
# Load data
df = pd.read_csv("data.csv")
# Basic analysis
print(f"Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print(f"Summary:\\n{df.describe()}")
```""",
'drive': """```python
from google.colab import drive
drive.mount('/content/gdrive')
```"""
}
task_lower = task.lower()
if 'drive' in task_lower or 'mount' in task_lower:
return templates['drive']
elif 'web' in task_lower or 'app' in task_lower:
return templates['web']
elif 'data' in task_lower or 'analy' in task_lower:
return templates['data']
return templates['drive'] # Default
# Test function
if __name__ == "__main__":
# Test detection
tests = [
"नमस्ते",
"hello world",
"hi bhai kya haal hai",
"google drive mount code do"
]
for test in tests:
lang = detect_input_language(test)
print(f"{test[:20]:20} -> {lang}")
# Test code generation with markdown
test_code = generate_basic_code("google drive")
print(f"\nGenerated code sample:\n{test_code[:100]}...")
print("\n✅ language_detector.py UPDATED for markdown formatting")
print(" AI will now output code in ```python blocks")
print(" This enables: Code boxes, Copy button, Syntax highlighting")