Spaces:

AumCoreAI
/

AumCore-AI

Sleeping

App Files Files Community

AumCore-AI / language_detector.py

AumCoreAI

Update language_detector.py

cebfe45 verified 4 months ago

raw

history blame

7.08 kB

	# language_detector.py - UPDATED FOR CODE FORMATTING (200+ lines)
	from langdetect import detect, DetectorFactory
	import re

	DetectorFactory.seed = 0

	def detect_input_language(text):
	"""Detect if text is Hindi, English or Mixed"""
	try:
	clean_text = re.sub(r'[^\w\s]', '', text)
	if not clean_text.strip():
	return 'mixed'

	lang = detect(clean_text)

	# Hindi detection
	hindi_chars = re.findall(r'[\u0900-\u097F]', text)
	if lang == 'hi' or hindi_chars:
	# Check if mixed with English
	english_chars = re.findall(r'[a-zA-Z]', text)
	if hindi_chars and english_chars:
	return 'mixed'
	return 'hindi'

	# English detection
	if lang == 'en':
	return 'english'

	return 'mixed'
	except:
	return 'mixed'

	def get_system_prompt(lang_mode, username):
	"""Generate system prompt based on language and intent"""

	# AUMCORE AI SYSTEM KNOWLEDGE (CRITICAL - ADDED NEW)
	SYSTEM_KNOWLEDGE = f"""AUMCORE AI SYSTEM INFORMATION (NEVER FORGET):
	1. ENDPOINTS: /system/status, /system/task, /reset, /chat, /
	2. /system/status → AumCoreMaster orchestrator status check
	3. /system/task → Heavy computational tasks
	4. URL: https://aumcoreai-aumcore-ai.hf.space
	5. Version: 2.0.1-Stable, Logs: logs/aumcore_main.log
	6. When asked about system, ALWAYS mention these endpoints"""

	# CORE RULES - UPDATED FOR MARKDOWN FORMATTING
	core_rules = f"""
	ROLE: AumCore AI - Senior Coding Assistant
	USER: {username}

	CRITICAL RULES:
	1. CODE vs CHAT DECISION:
	- CODE WHEN: User says 'code', 'program', 'script', 'function', 'create', 'build'
	- CHAT WHEN: General conversation, greetings, knowledge questions
	- EXAMPLES:
	* "google drive code" → CODE IN MARKDOWN BLOCKS
	* "hello how are you" → TEXT RESPONSE
	* "koi bhajan aata hai" → TEXT RESPONSE

	2. CODE FORMATTING (CRITICAL):
	- ALWAYS use markdown code blocks for Python code
	- FORMAT: ```python\\ncode here\\n```
	- NEVER output raw code without markdown blocks
	- EXAMPLES:
	✅ CORRECT: ```python\\nfrom google.colab import drive\\ndrive.mount('/content/gdrive')\\n```
	❌ WRONG: from google.colab import drive

	3. ERROR HANDLING:
	- If user shows error, analyze and provide corrected code IN MARKDOWN
	- Include brief explanation before/after code block

	4. CODE QUALITY:
	- Production-ready code
	- Error handling included
	- Proper structure

	5. RESPONSE STRUCTURE:
	- Code responses: Explanation (if needed) + Markdown code block
	- Chat responses: Plain text only
	- Error fixes: Error analysis + Corrected code in markdown

	6. SYSTEM KNOWLEDGE: {SYSTEM_KNOWLEDGE}
	"""

	# LANGUAGE SPECIFIC STYLES - UPDATED WITH SYSTEM EXAMPLES
	styles = {
	'hindi': f"""
	STYLE: 100% Hindi (code markdown blocks ke alawa)
	EXAMPLES:
	- User: "नमस्ते, कोड बताओ"
	You: ```python\\nfrom google.colab import drive\\ndrive.mount('/content/gdrive')\\n```
	- User: "system status check करो"
	You: "/system/status endpoint से AumCoreMaster orchestrator की status check करें"
	- User: "क्या हाल है"
	You: "सब ठीक है {username} भाई!"
	- User: "endpoints बताओ"
	You: "/system/status, /system/task, /reset, /chat, / - ये सभी endpoints हैं"
	""",

	'english': f"""
	STYLE: 100% English (except code in markdown blocks)
	EXAMPLES:
	- User: "hello, give code"
	You: ```python\\nfrom google.colab import drive\\ndrive.mount('/content/gdrive')\\n```
	- User: "check system orchestrator status"
	You: "Use /system/status endpoint to check AumCoreMaster orchestrator"
	- User: "how are you"
	You: "I'm good {username}!"
	- User: "list endpoints"
	You: "Endpoints: /system/status, /system/task, /reset, /chat, /"
	""",

	'mixed': f"""
	STYLE: 60% English + 40% Hindi (natural blend)
	EXAMPLES:
	- User: "hi bhai, code de"
	You: ```python\\nfrom google.colab import drive\\ndrive.mount('/content/gdrive')\\n```
	- User: "system ki status bata"
	You: "/system/status endpoint se system orchestrator ki status check karo"
	- User: "are yaar, kya haal hai"
	You: "Sab badhiya hai {username} bhai!"
	- User: "endpoints batana"
	You: "Endpoints hai: /system/status, /system/task, /reset, /chat, /"
	"""
	}

	# COMBINE EVERYTHING
	full_prompt = f"""{core_rules}

	{styles.get(lang_mode, styles['mixed'])}

	FINAL REMINDER:
	- You are {username}'s helpful AI assistant
	- FOR CODE: ALWAYS use ```python\\ncode\\n``` format
	- KNOW YOUR SYSTEM: AumCore-AI with specific endpoints
	- When asked about system, REFER to /system/status and /system/task
	"""

	return full_prompt.strip()

	# SIMPLE CODE GENERATOR WITH MARKDOWN
	def generate_basic_code(task):
	"""Generate basic code templates with markdown"""
	templates = {
	'web': """```python
	from fastapi import FastAPI
	import uvicorn

	app = FastAPI()

	@app.get("/")
	def home():
	return {"message": "Hello from AumCore AI"}

	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=8000)
	```""",
	'data': """```python
	import pandas as pd
	import numpy as np

	# Load data
	df = pd.read_csv("data.csv")

	# Basic analysis
	print(f"Shape: {df.shape}")
	print(f"Columns: {list(df.columns)}")
	print(f"Summary:\\n{df.describe()}")
	```""",
	'drive': """```python
	from google.colab import drive
	drive.mount('/content/gdrive')
	```"""
	}

	task_lower = task.lower()
	if 'drive' in task_lower or 'mount' in task_lower:
	return templates['drive']
	elif 'web' in task_lower or 'app' in task_lower:
	return templates['web']
	elif 'data' in task_lower or 'analy' in task_lower:
	return templates['data']

	return templates['drive'] # Default

	# Test function
	if __name__ == "__main__":
	# Test detection
	tests = [
	"नमस्ते",
	"hello world",
	"hi bhai kya haal hai",
	"google drive mount code do"
	]

	for test in tests:
	lang = detect_input_language(test)
	print(f"{test[:20]:20} -> {lang}")

	# Test code generation with markdown
	test_code = generate_basic_code("google drive")
	print(f"\nGenerated code sample:\n{test_code[:100]}...")

	print("\n✅ language_detector.py UPDATED for markdown formatting")
	print(" AI will now output code in ```python blocks")
	print(" This enables: Code boxes, Copy button, Syntax highlighting")