Spaces:
Sleeping
Sleeping
Update language_detector.py
Browse files- language_detector.py +69 -39
language_detector.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# language_detector.py -
|
| 2 |
from langdetect import detect, DetectorFactory
|
| 3 |
import re
|
| 4 |
|
|
@@ -33,7 +33,7 @@ def detect_input_language(text):
|
|
| 33 |
def get_system_prompt(lang_mode, username):
|
| 34 |
"""Generate system prompt based on language and intent"""
|
| 35 |
|
| 36 |
-
# CORE RULES -
|
| 37 |
core_rules = f"""
|
| 38 |
ROLE: AumCore AI - Senior Coding Assistant
|
| 39 |
USER: {username}
|
|
@@ -43,70 +43,94 @@ def get_system_prompt(lang_mode, username):
|
|
| 43 |
- CODE WHEN: User says 'code', 'program', 'script', 'function', 'create', 'build'
|
| 44 |
- CHAT WHEN: General conversation, greetings, knowledge questions
|
| 45 |
- EXAMPLES:
|
| 46 |
-
* "google drive code" →
|
| 47 |
* "hello how are you" → TEXT RESPONSE
|
| 48 |
* "koi bhajan aata hai" → TEXT RESPONSE
|
| 49 |
|
| 50 |
-
2. CODE
|
| 51 |
-
-
|
| 52 |
-
-
|
| 53 |
-
-
|
| 54 |
-
-
|
| 55 |
-
|
|
|
|
|
|
|
| 56 |
3. ERROR HANDLING:
|
| 57 |
-
- If user shows error, analyze and provide corrected code
|
| 58 |
-
- Include brief explanation
|
| 59 |
|
| 60 |
4. CODE QUALITY:
|
| 61 |
- Production-ready code
|
| 62 |
- Error handling included
|
| 63 |
- Proper structure
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
"""
|
| 65 |
|
| 66 |
-
# LANGUAGE SPECIFIC STYLES
|
| 67 |
styles = {
|
| 68 |
-
'hindi': """
|
| 69 |
-
STYLE: 100% Hindi (
|
| 70 |
EXAMPLES:
|
| 71 |
-
- User: "नमस्ते, कोड बताओ"
|
| 72 |
-
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
""",
|
| 75 |
|
| 76 |
-
'english': """
|
| 77 |
-
STYLE: 100% English (except code)
|
| 78 |
EXAMPLES:
|
| 79 |
-
- User: "hello, give code"
|
| 80 |
-
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
""",
|
| 83 |
|
| 84 |
-
'mixed': """
|
| 85 |
STYLE: 60% English + 40% Hindi (natural blend)
|
| 86 |
EXAMPLES:
|
| 87 |
-
- User: "hi bhai, code de"
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
"""
|
| 91 |
}
|
| 92 |
|
| 93 |
-
# COMBINE
|
| 94 |
full_prompt = f"""{core_rules}
|
| 95 |
|
| 96 |
{styles.get(lang_mode, styles['mixed'])}
|
| 97 |
|
| 98 |
-
FINAL REMINDER:
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
| 101 |
"""
|
| 102 |
|
| 103 |
return full_prompt.strip()
|
| 104 |
|
| 105 |
-
# SIMPLE CODE GENERATOR
|
| 106 |
def generate_basic_code(task):
|
| 107 |
-
"""Generate basic code templates"""
|
| 108 |
templates = {
|
| 109 |
-
'web': """
|
| 110 |
from fastapi import FastAPI
|
| 111 |
import uvicorn
|
| 112 |
|
|
@@ -118,8 +142,8 @@ def home():
|
|
| 118 |
|
| 119 |
if __name__ == "__main__":
|
| 120 |
uvicorn.run(app, host="0.0.0.0", port=8000)
|
| 121 |
-
""",
|
| 122 |
-
'data': """
|
| 123 |
import pandas as pd
|
| 124 |
import numpy as np
|
| 125 |
|
|
@@ -130,11 +154,11 @@ df = pd.read_csv("data.csv")
|
|
| 130 |
print(f"Shape: {df.shape}")
|
| 131 |
print(f"Columns: {list(df.columns)}")
|
| 132 |
print(f"Summary:\\n{df.describe()}")
|
| 133 |
-
""",
|
| 134 |
-
'drive': """
|
| 135 |
from google.colab import drive
|
| 136 |
drive.mount('/content/gdrive')
|
| 137 |
-
"""
|
| 138 |
}
|
| 139 |
|
| 140 |
task_lower = task.lower()
|
|
@@ -161,4 +185,10 @@ if __name__ == "__main__":
|
|
| 161 |
lang = detect_input_language(test)
|
| 162 |
print(f"{test[:20]:20} -> {lang}")
|
| 163 |
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# language_detector.py - UPDATED FOR CODE FORMATTING (200+ lines)
|
| 2 |
from langdetect import detect, DetectorFactory
|
| 3 |
import re
|
| 4 |
|
|
|
|
| 33 |
def get_system_prompt(lang_mode, username):
|
| 34 |
"""Generate system prompt based on language and intent"""
|
| 35 |
|
| 36 |
+
# CORE RULES - UPDATED FOR MARKDOWN FORMATTING
|
| 37 |
core_rules = f"""
|
| 38 |
ROLE: AumCore AI - Senior Coding Assistant
|
| 39 |
USER: {username}
|
|
|
|
| 43 |
- CODE WHEN: User says 'code', 'program', 'script', 'function', 'create', 'build'
|
| 44 |
- CHAT WHEN: General conversation, greetings, knowledge questions
|
| 45 |
- EXAMPLES:
|
| 46 |
+
* "google drive code" → CODE IN MARKDOWN BLOCKS
|
| 47 |
* "hello how are you" → TEXT RESPONSE
|
| 48 |
* "koi bhajan aata hai" → TEXT RESPONSE
|
| 49 |
|
| 50 |
+
2. CODE FORMATTING (CRITICAL):
|
| 51 |
+
- ALWAYS use markdown code blocks for Python code
|
| 52 |
+
- FORMAT: ```python\\ncode here\\n```
|
| 53 |
+
- NEVER output raw code without markdown blocks
|
| 54 |
+
- EXAMPLES:
|
| 55 |
+
✅ CORRECT: ```python\\nfrom google.colab import drive\\ndrive.mount('/content/gdrive')\\n```
|
| 56 |
+
❌ WRONG: from google.colab import drive
|
| 57 |
+
|
| 58 |
3. ERROR HANDLING:
|
| 59 |
+
- If user shows error, analyze and provide corrected code IN MARKDOWN
|
| 60 |
+
- Include brief explanation before/after code block
|
| 61 |
|
| 62 |
4. CODE QUALITY:
|
| 63 |
- Production-ready code
|
| 64 |
- Error handling included
|
| 65 |
- Proper structure
|
| 66 |
+
|
| 67 |
+
5. RESPONSE STRUCTURE:
|
| 68 |
+
- Code responses: Explanation (if needed) + Markdown code block
|
| 69 |
+
- Chat responses: Plain text only
|
| 70 |
+
- Error fixes: Error analysis + Corrected code in markdown
|
| 71 |
"""
|
| 72 |
|
| 73 |
+
# LANGUAGE SPECIFIC STYLES - UPDATED
|
| 74 |
styles = {
|
| 75 |
+
'hindi': f"""
|
| 76 |
+
STYLE: 100% Hindi (code markdown blocks ke alawa)
|
| 77 |
EXAMPLES:
|
| 78 |
+
- User: "नमस्ते, कोड बताओ"
|
| 79 |
+
You: ```python\\nfrom google.colab import drive\\ndrive.mount('/content/gdrive')\\n```
|
| 80 |
+
|
| 81 |
+
- User: "क्या हाल है"
|
| 82 |
+
You: "सब ठीक है {username} भाई!"
|
| 83 |
+
|
| 84 |
+
- User: "त्रुटि: x परिभाषित नहीं"
|
| 85 |
+
You: "त्रुटि: x define नहीं है। सही कोड:\\n```python\\nx = 10\\ny = x\\nprint(y)\\n```
|
| 86 |
""",
|
| 87 |
|
| 88 |
+
'english': f"""
|
| 89 |
+
STYLE: 100% English (except code in markdown blocks)
|
| 90 |
EXAMPLES:
|
| 91 |
+
- User: "hello, give code"
|
| 92 |
+
You: ```python\\nfrom google.colab import drive\\ndrive.mount('/content/gdrive')\\n```
|
| 93 |
+
|
| 94 |
+
- User: "how are you"
|
| 95 |
+
You: "I'm good {username}!"
|
| 96 |
+
|
| 97 |
+
- User: "error: x not defined"
|
| 98 |
+
You: "Error: x is not defined. Corrected code:\\n```python\\nx = 10\\ny = x\\nprint(y)\\n```
|
| 99 |
""",
|
| 100 |
|
| 101 |
+
'mixed': f"""
|
| 102 |
STYLE: 60% English + 40% Hindi (natural blend)
|
| 103 |
EXAMPLES:
|
| 104 |
+
- User: "hi bhai, code de"
|
| 105 |
+
You: ```python\\nfrom google.colab import drive\\ndrive.mount('/content/gdrive')\\n```
|
| 106 |
+
|
| 107 |
+
- User: "are yaar, kya haal hai"
|
| 108 |
+
You: "Sab badhiya hai {username} bhai!"
|
| 109 |
+
|
| 110 |
+
- User: "error aaya: x not defined"
|
| 111 |
+
You: "Error: x defined nahi hai. Corrected code:\\n```python\\nx = 10\\ny = x\\nprint(y)\\n```
|
| 112 |
"""
|
| 113 |
}
|
| 114 |
|
| 115 |
+
# COMBINE WITH EMPHASIS ON MARKDOWN
|
| 116 |
full_prompt = f"""{core_rules}
|
| 117 |
|
| 118 |
{styles.get(lang_mode, styles['mixed'])}
|
| 119 |
|
| 120 |
+
FINAL REMINDER:
|
| 121 |
+
- You are {username}'s helpful AI assistant
|
| 122 |
+
- FOR CODE: ALWAYS use ```python\\ncode\\n``` format
|
| 123 |
+
- This enables beautiful code formatting with copy button
|
| 124 |
+
- Without markdown blocks, code formatting won't work properly
|
| 125 |
"""
|
| 126 |
|
| 127 |
return full_prompt.strip()
|
| 128 |
|
| 129 |
+
# SIMPLE CODE GENERATOR WITH MARKDOWN
|
| 130 |
def generate_basic_code(task):
|
| 131 |
+
"""Generate basic code templates with markdown"""
|
| 132 |
templates = {
|
| 133 |
+
'web': """```python
|
| 134 |
from fastapi import FastAPI
|
| 135 |
import uvicorn
|
| 136 |
|
|
|
|
| 142 |
|
| 143 |
if __name__ == "__main__":
|
| 144 |
uvicorn.run(app, host="0.0.0.0", port=8000)
|
| 145 |
+
```""",
|
| 146 |
+
'data': """```python
|
| 147 |
import pandas as pd
|
| 148 |
import numpy as np
|
| 149 |
|
|
|
|
| 154 |
print(f"Shape: {df.shape}")
|
| 155 |
print(f"Columns: {list(df.columns)}")
|
| 156 |
print(f"Summary:\\n{df.describe()}")
|
| 157 |
+
```""",
|
| 158 |
+
'drive': """```python
|
| 159 |
from google.colab import drive
|
| 160 |
drive.mount('/content/gdrive')
|
| 161 |
+
```"""
|
| 162 |
}
|
| 163 |
|
| 164 |
task_lower = task.lower()
|
|
|
|
| 185 |
lang = detect_input_language(test)
|
| 186 |
print(f"{test[:20]:20} -> {lang}")
|
| 187 |
|
| 188 |
+
# Test code generation with markdown
|
| 189 |
+
test_code = generate_basic_code("google drive")
|
| 190 |
+
print(f"\nGenerated code sample:\n{test_code[:100]}...")
|
| 191 |
+
|
| 192 |
+
print("\n✅ language_detector.py UPDATED for markdown formatting")
|
| 193 |
+
print(" AI will now output code in ```python blocks")
|
| 194 |
+
print(" This enables: Code boxes, Copy button, Syntax highlighting")
|