Spaces:
Running
Running
Update src/brain_agent.py
Browse files- src/brain_agent.py +29 -120
src/brain_agent.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
| 2 |
🧠 Agent 2 (Interpretation) - Gemini 2.0 EDITION
|
| 3 |
-------------------------------------------------
|
| 4 |
This version of the AgentBrain is specifically designed to work with Google's Gemini 2.0.
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
import os
|
|
@@ -13,64 +14,29 @@ import concurrent.futures
|
|
| 13 |
from rapidfuzz import process, fuzz
|
| 14 |
|
| 15 |
class AgentInterpretation:
|
| 16 |
-
def __init__(self, config, gemini_manager_instance=None
|
| 17 |
self.config = config
|
| 18 |
self.df = pd.DataFrame()
|
| 19 |
self.lookup_list = []
|
| 20 |
self.gemini_manager = gemini_manager_instance
|
| 21 |
-
self.hf_manager = hf_manager_instance
|
| 22 |
self.active_profiles_list = []
|
| 23 |
-
|
| 24 |
-
# 🟢 FIX: Initialize with a safe default so it ALWAYS exists
|
| 25 |
-
self.lab_profile = {"lab_name": "Default Profile", "jargon": {}}
|
| 26 |
-
|
| 27 |
-
# Persistence for last used profile
|
| 28 |
-
self.last_profile_path = os.path.join(self.config.PROFILES_DIR, "last_used_profile.txt")
|
| 29 |
|
| 30 |
-
# Load
|
| 31 |
-
|
| 32 |
-
print(f"🧠 Loading initial profile: {last_used}")
|
| 33 |
-
self.load_profile_by_name(last_used)
|
| 34 |
|
| 35 |
self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
|
| 36 |
print("🧠 Agent 2 (Interpretation) Online: Persistent Pool Ready.")
|
| 37 |
self.refresh_knowledge_base()
|
| 38 |
|
| 39 |
-
def _get_last_used_profile_name(self):
|
| 40 |
-
try:
|
| 41 |
-
if os.path.exists(self.last_profile_path):
|
| 42 |
-
with open(self.last_profile_path, 'r', encoding='utf-8') as f:
|
| 43 |
-
name = f.read().strip()
|
| 44 |
-
if os.path.exists(os.path.join(self.config.PROFILES_DIR, name)):
|
| 45 |
-
return name
|
| 46 |
-
except: pass
|
| 47 |
-
return "Korean English.json"
|
| 48 |
-
|
| 49 |
-
# 🟢 NEW: Manual Sync Method
|
| 50 |
-
def sync_from_huggingface(self):
|
| 51 |
-
if self.hf_manager:
|
| 52 |
-
print("🔄 Triggering Manual Cloud Sync...")
|
| 53 |
-
self.hf_manager.pull_datasets()
|
| 54 |
-
self.refresh_knowledge_base() # Reload CSVs
|
| 55 |
-
return "✅ Cloud Sync Complete. Lists refreshed."
|
| 56 |
-
return "⚠️ HF Manager not connected."
|
| 57 |
-
|
| 58 |
def get_available_profiles(self):
|
| 59 |
files = glob.glob(os.path.join(self.config.PROFILES_DIR, "*.json"))
|
| 60 |
-
return
|
| 61 |
|
| 62 |
def load_profile_by_name(self, filename):
|
| 63 |
path = os.path.join(self.config.PROFILES_DIR, filename)
|
| 64 |
try:
|
| 65 |
with open(path, 'r', encoding='utf-8') as f:
|
| 66 |
self.lab_profile = json.load(f)
|
| 67 |
-
|
| 68 |
-
# Save persistence
|
| 69 |
-
try:
|
| 70 |
-
with open(self.last_profile_path, 'w', encoding='utf-8') as f_last:
|
| 71 |
-
f_last.write(filename)
|
| 72 |
-
except: pass
|
| 73 |
-
|
| 74 |
return self.lab_profile
|
| 75 |
except: return {}
|
| 76 |
|
|
@@ -78,14 +44,10 @@ class AgentInterpretation:
|
|
| 78 |
if not filename.endswith(".json"): filename += ".json"
|
| 79 |
path = os.path.join(self.config.PROFILES_DIR, filename)
|
| 80 |
try:
|
| 81 |
-
with open(path, "w", encoding="utf-8") as f:
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
self.hf_manager.push_update(path, commit_msg=f"Update Profile: {filename}")
|
| 86 |
-
return "✅ Saved locally & Pushing to HF..."
|
| 87 |
-
else:
|
| 88 |
-
return "⚠️ Saved locally only (HF Manager missing)"
|
| 89 |
except Exception as e: return f"❌ Error: {e}"
|
| 90 |
|
| 91 |
def get_current_profile_text(self):
|
|
@@ -96,6 +58,7 @@ class AgentInterpretation:
|
|
| 96 |
loaded_count = 0
|
| 97 |
names = []
|
| 98 |
self.active_profiles_list = []
|
|
|
|
| 99 |
for f_path in files:
|
| 100 |
try:
|
| 101 |
with open(f_path, 'r', encoding='utf-8') as f:
|
|
@@ -136,20 +99,32 @@ class AgentInterpretation:
|
|
| 136 |
return cleaned_list
|
| 137 |
|
| 138 |
def generate_mission(self, context="General"):
|
|
|
|
|
|
|
|
|
|
| 139 |
prompt = f"""
|
| 140 |
Generate a short, realistic roleplay scenario for a user to speak a dialect phrase.
|
|
|
|
| 141 |
Constraints:
|
| 142 |
1. Context: {context}
|
| 143 |
-
2. Target Audience: Speakers of non-standard English dialects.
|
| 144 |
3. Length: 1 sentence.
|
| 145 |
-
4. Goal: Provoke a natural reaction.
|
| 146 |
5. Output Format: JSON with keys 'text' and 'emoji'.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
"""
|
|
|
|
| 148 |
try:
|
|
|
|
| 149 |
response = self.gemini_manager.generate_content(prompt)
|
|
|
|
| 150 |
clean_json = response.text.replace("```json", "").replace("```", "").strip()
|
| 151 |
return json.loads(clean_json)
|
| 152 |
except:
|
|
|
|
| 153 |
return {"text": "Describe your morning routine.", "emoji": "☀️"}
|
| 154 |
|
| 155 |
def generate_unknown_analysis(self, text):
|
|
@@ -161,8 +136,8 @@ class AgentInterpretation:
|
|
| 161 |
prompt = f"""
|
| 162 |
Analyze utterance: "{text}"
|
| 163 |
Context/Jargon Keys: {list(set(all_jargon_keys))[:50]}
|
| 164 |
-
Task: Provide
|
| 165 |
-
CRITICAL INSTRUCTION: Treat the input as valid, meaningful Dialectal English.
|
| 166 |
Output Strictly JSON: [ {{ "Dialect": "General", "Clarification": "...", "Tone": "...", "Context": "...", "Pragmatics": "..." }} ]
|
| 167 |
"""
|
| 168 |
try:
|
|
@@ -174,7 +149,7 @@ class AgentInterpretation:
|
|
| 174 |
return [{"Dialect": "Unknown", "Clarification": "Analysis Failed", "Tone": "---", "Context": "---", "Pragmatic Analysis": "Error"}]
|
| 175 |
|
| 176 |
def adapt_with_ai(self, full_text, db_row):
|
| 177 |
-
if not self.gemini_manager: return db_row["Clarification"], db_row
|
| 178 |
prompt = f"""
|
| 179 |
Ref Term: "{db_row['Utterance']}" = "{db_row['Clarification']}"
|
| 180 |
User said: "{full_text}"
|
|
@@ -188,7 +163,7 @@ class AgentInterpretation:
|
|
| 188 |
data = json.loads(clean_json.group(0))
|
| 189 |
return data.get("clarification", db_row["Clarification"]), data.get("pragmatics", "AI Adapted Analysis")
|
| 190 |
except: pass
|
| 191 |
-
return db_row["Clarification"], db_row
|
| 192 |
|
| 193 |
def detect_and_analyze(self, text, threshold=60):
|
| 194 |
clean_text = text.lower().strip()
|
|
@@ -287,70 +262,4 @@ class AgentInterpretation:
|
|
| 287 |
pat = self.gemini_manager.generate_fast(f"Regex for: '{utterance}'. Return ONLY regex string.").text.strip().replace("`", "")
|
| 288 |
re.compile(pat)
|
| 289 |
return pat
|
| 290 |
-
except: return safe
|
| 291 |
-
|
| 292 |
-
# ==========================================
|
| 293 |
-
# PhD-GRADE SOCIOLINGUISTIC ANALYSIS
|
| 294 |
-
# ==========================================
|
| 295 |
-
def analyze_dialect_multi(self, text, language_code="en"):
|
| 296 |
-
"""
|
| 297 |
-
PhD-Grade Sociolinguistic Analysis for PureConvo Project.
|
| 298 |
-
Extracts Intent, Register, and Cultural Nuance using structured JSON.
|
| 299 |
-
"""
|
| 300 |
-
# Define the research-grade prompt
|
| 301 |
-
research_prompt = f"""
|
| 302 |
-
Act as an expert Sociolinguist for the PureConvo research project.
|
| 303 |
-
Analyze the following utterance within the context of Intra-English Dialect Interpretation.
|
| 304 |
-
|
| 305 |
-
Utterance: "{text}"
|
| 306 |
-
Primary Language Context: {language_code}
|
| 307 |
-
|
| 308 |
-
Provide a multi-dialectal analysis in a JSON LIST format. For each likely dialect, include:
|
| 309 |
-
1. "dialect": The specific name (e.g., 'Nigerian English', 'AAVE').
|
| 310 |
-
2. "clarification": A 'General English' translation that preserves original intent.
|
| 311 |
-
3. "tone": Categorize as: 'Neutral / Conversational', 'Casual / Slang', 'Formal / Professional', or 'Proverb / Idiom'.
|
| 312 |
-
4. "context": The situational environment where this is most likely spoken.
|
| 313 |
-
5. "pragmatics": A structured string containing:
|
| 314 |
-
- [Force]: (e.g., Phatic, Directive, Expressive)
|
| 315 |
-
- [Deixis]: Implied social hierarchy/distance.
|
| 316 |
-
- [Register]: Numeric scale 1-5.
|
| 317 |
-
- [Nuance]: Specific cultural/linguistic markers.
|
| 318 |
-
|
| 319 |
-
JSON format ONLY. No conversational filler.
|
| 320 |
-
"""
|
| 321 |
-
|
| 322 |
-
try:
|
| 323 |
-
# 🟢 FIX: Using self.gemini_manager instead of self.model
|
| 324 |
-
response = self.gemini_manager.generate_content(research_prompt)
|
| 325 |
-
|
| 326 |
-
# Clean response text (remove markdown blocks if present)
|
| 327 |
-
raw_json = response.text.replace("```json", "").replace("```", "").strip()
|
| 328 |
-
analysis_list = json.loads(raw_json)
|
| 329 |
-
|
| 330 |
-
# 🟢 VALIDATION: Ensure the data matches your UI columns perfectly
|
| 331 |
-
validated_results = []
|
| 332 |
-
for item in analysis_list:
|
| 333 |
-
validated_results.append({
|
| 334 |
-
"Clarification_Source": "✨ PhD-AI (CA-IEDI)", # Added for UI consistency
|
| 335 |
-
"Speaker": "Speaker 1", # Added for UI consistency
|
| 336 |
-
"dialect": item.get("dialect", "Unknown"),
|
| 337 |
-
"clarification": item.get("clarification", "N/A"),
|
| 338 |
-
"tone": item.get("tone", "Neutral / Conversational"),
|
| 339 |
-
"context": item.get("context", "General"),
|
| 340 |
-
"pragmatics": item.get("pragmatics", "No analysis provided.")
|
| 341 |
-
})
|
| 342 |
-
|
| 343 |
-
return validated_results
|
| 344 |
-
|
| 345 |
-
except Exception as e:
|
| 346 |
-
print(f"❌ [BRAIN] Analysis Pipeline Crash: {e}")
|
| 347 |
-
# Return a safe fallback list to prevent UI "Format Errors"
|
| 348 |
-
return [{
|
| 349 |
-
"Clarification_Source": "⚠️ Error",
|
| 350 |
-
"Speaker": "---",
|
| 351 |
-
"dialect": "Detection Failed",
|
| 352 |
-
"clarification": "Error processing analysis",
|
| 353 |
-
"tone": "Neutral",
|
| 354 |
-
"context": "N/A",
|
| 355 |
-
"pragmatics": str(e)
|
| 356 |
-
}]
|
|
|
|
| 2 |
🧠 Agent 2 (Interpretation) - Gemini 2.0 EDITION
|
| 3 |
-------------------------------------------------
|
| 4 |
This version of the AgentBrain is specifically designed to work with Google's Gemini 2.0.
|
| 5 |
+
|
| 6 |
"""
|
| 7 |
|
| 8 |
import os
|
|
|
|
| 14 |
from rapidfuzz import process, fuzz
|
| 15 |
|
| 16 |
class AgentInterpretation:
|
| 17 |
+
def __init__(self, config, gemini_manager_instance=None):
|
| 18 |
self.config = config
|
| 19 |
self.df = pd.DataFrame()
|
| 20 |
self.lookup_list = []
|
| 21 |
self.gemini_manager = gemini_manager_instance
|
|
|
|
| 22 |
self.active_profiles_list = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
# Load default
|
| 25 |
+
self.load_profile_by_name("NSL Lab Trainer.json")
|
|
|
|
|
|
|
| 26 |
|
| 27 |
self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
|
| 28 |
print("🧠 Agent 2 (Interpretation) Online: Persistent Pool Ready.")
|
| 29 |
self.refresh_knowledge_base()
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
def get_available_profiles(self):
|
| 32 |
files = glob.glob(os.path.join(self.config.PROFILES_DIR, "*.json"))
|
| 33 |
+
return [os.path.basename(f) for f in files]
|
| 34 |
|
| 35 |
def load_profile_by_name(self, filename):
|
| 36 |
path = os.path.join(self.config.PROFILES_DIR, filename)
|
| 37 |
try:
|
| 38 |
with open(path, 'r', encoding='utf-8') as f:
|
| 39 |
self.lab_profile = json.load(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
return self.lab_profile
|
| 41 |
except: return {}
|
| 42 |
|
|
|
|
| 44 |
if not filename.endswith(".json"): filename += ".json"
|
| 45 |
path = os.path.join(self.config.PROFILES_DIR, filename)
|
| 46 |
try:
|
| 47 |
+
with open(path, "w", encoding="utf-8") as f: json.dump(json.loads(json_str), f, indent=2)
|
| 48 |
+
# Note: We can't easily push to HF here without circular dependency or passing HF manager.
|
| 49 |
+
# Ideally, the Brain agent shouldn't manage HF syncing, but for now we return success.
|
| 50 |
+
return "✅ Saved locally (HF Sync pending)"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
except Exception as e: return f"❌ Error: {e}"
|
| 52 |
|
| 53 |
def get_current_profile_text(self):
|
|
|
|
| 58 |
loaded_count = 0
|
| 59 |
names = []
|
| 60 |
self.active_profiles_list = []
|
| 61 |
+
|
| 62 |
for f_path in files:
|
| 63 |
try:
|
| 64 |
with open(f_path, 'r', encoding='utf-8') as f:
|
|
|
|
| 99 |
return cleaned_list
|
| 100 |
|
| 101 |
def generate_mission(self, context="General"):
|
| 102 |
+
"""
|
| 103 |
+
Generates a random, realistic scenario based on the context.
|
| 104 |
+
"""
|
| 105 |
prompt = f"""
|
| 106 |
Generate a short, realistic roleplay scenario for a user to speak a dialect phrase.
|
| 107 |
+
|
| 108 |
Constraints:
|
| 109 |
1. Context: {context}
|
| 110 |
+
2. Target Audience: Speakers of non-standard English dialects (e.g., Nigerian, Indian).
|
| 111 |
3. Length: 1 sentence.
|
| 112 |
+
4. Goal: Provoke a natural reaction (complaint, greeting, warning, negotiation).
|
| 113 |
5. Output Format: JSON with keys 'text' and 'emoji'.
|
| 114 |
+
|
| 115 |
+
Examples:
|
| 116 |
+
- Context: Traffic -> {{"text": "A bus driver cut you off. Yell at him.", "emoji": "🚌"}}
|
| 117 |
+
- Context: Market -> {{"text": "The fish is too expensive. Ask for a discount.", "emoji": "🐟"}}
|
| 118 |
"""
|
| 119 |
+
|
| 120 |
try:
|
| 121 |
+
# Call Gemini (assuming self.gemini_manager is set up)
|
| 122 |
response = self.gemini_manager.generate_content(prompt)
|
| 123 |
+
# Clean up JSON (sometimes AI adds ```json ... ```)
|
| 124 |
clean_json = response.text.replace("```json", "").replace("```", "").strip()
|
| 125 |
return json.loads(clean_json)
|
| 126 |
except:
|
| 127 |
+
# Fallback if AI fails
|
| 128 |
return {"text": "Describe your morning routine.", "emoji": "☀️"}
|
| 129 |
|
| 130 |
def generate_unknown_analysis(self, text):
|
|
|
|
| 136 |
prompt = f"""
|
| 137 |
Analyze utterance: "{text}"
|
| 138 |
Context/Jargon Keys: {list(set(all_jargon_keys))[:50]}
|
| 139 |
+
Task: Provide 3 distinct interpretations (Casual, Formal, or Cultural).
|
| 140 |
+
CRITICAL INSTRUCTION: Treat the input as valid, meaningful Dialectal English. Do NOT label it as "incorrect".
|
| 141 |
Output Strictly JSON: [ {{ "Dialect": "General", "Clarification": "...", "Tone": "...", "Context": "...", "Pragmatics": "..." }} ]
|
| 142 |
"""
|
| 143 |
try:
|
|
|
|
| 149 |
return [{"Dialect": "Unknown", "Clarification": "Analysis Failed", "Tone": "---", "Context": "---", "Pragmatic Analysis": "Error"}]
|
| 150 |
|
| 151 |
def adapt_with_ai(self, full_text, db_row):
|
| 152 |
+
if not self.gemini_manager: return db_row["Clarification"], db_row["Pragmatic_Analysis"]
|
| 153 |
prompt = f"""
|
| 154 |
Ref Term: "{db_row['Utterance']}" = "{db_row['Clarification']}"
|
| 155 |
User said: "{full_text}"
|
|
|
|
| 163 |
data = json.loads(clean_json.group(0))
|
| 164 |
return data.get("clarification", db_row["Clarification"]), data.get("pragmatics", "AI Adapted Analysis")
|
| 165 |
except: pass
|
| 166 |
+
return db_row["Clarification"], db_row["Pragmatic_Analysis"]
|
| 167 |
|
| 168 |
def detect_and_analyze(self, text, threshold=60):
|
| 169 |
clean_text = text.lower().strip()
|
|
|
|
| 262 |
pat = self.gemini_manager.generate_fast(f"Regex for: '{utterance}'. Return ONLY regex string.").text.strip().replace("`", "")
|
| 263 |
re.compile(pat)
|
| 264 |
return pat
|
| 265 |
+
except: return safe
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|