""" 🧠 Agent 2 (Interpretation) - GPT-4 EDITION ------------------------------------------------- This version of the AgentBrain is specifically designed to work with OpenAI's GPT-4. It replaces Google Gemini dependencies with OpenAI API calls. Requirements: - An 'openai_manager_instance' must be passed during initialization. - This manager is expected to hold an initialized 'client' (AsyncOpenAI or OpenAI). """ import os import glob import pandas as pd import json import re import concurrent.futures from rapidfuzz import process, fuzz class AgentInterpretation: def __init__(self, config, openai_manager_instance=None): self.config = config self.df = pd.DataFrame() self.lookup_list = [] self.openai_manager = openai_manager_instance # Renamed from gemini_manager self.active_profiles_list = [] # Load default self.load_profile_by_name("NSL Lab Trainer.json") self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=10) print("🧠 Agent 2 (Interpretation - GPT-4) Online: Persistent Pool Ready.") self.refresh_knowledge_base() # ------------------------------------------------------------------------ # HELPER: GPT-4 API Call Wrapper # ------------------------------------------------------------------------ def _call_gpt4(self, system_prompt, user_prompt, model="gpt-4"): """ Internal helper to standardize GPT-4 calls. Assumes self.openai_manager has a valid .client (OpenAI client). """ if not self.openai_manager or not hasattr(self.openai_manager, 'client'): return None try: response = self.openai_manager.client.chat.completions.create( model=model, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ], temperature=0.7 ) return response.choices[0].message.content except Exception as e: print(f"❌ GPT-4 Error: {e}") return None # ------------------------------------------------------------------------ # CORE PROFILE & DATA METHODS # ------------------------------------------------------------------------ def get_available_profiles(self): files = glob.glob(os.path.join(self.config.PROFILES_DIR, "*.json")) return [os.path.basename(f) for f in files] def load_profile_by_name(self, filename): path = os.path.join(self.config.PROFILES_DIR, filename) try: with open(path, 'r', encoding='utf-8') as f: self.lab_profile = json.load(f) return self.lab_profile except: return {} def save_specific_profile(self, filename, json_str): if not filename.endswith(".json"): filename += ".json" path = os.path.join(self.config.PROFILES_DIR, filename) try: with open(path, "w", encoding="utf-8") as f: json.dump(json.loads(json_str), f, indent=2) return "✅ Saved locally (HF Sync pending)" except Exception as e: return f"❌ Error: {e}" def get_current_profile_text(self): return json.dumps(self.lab_profile, indent=2) def load_all_profiles_simultaneously(self): files = glob.glob(os.path.join(self.config.PROFILES_DIR, "*.json")) loaded_count = 0 names = [] self.active_profiles_list = [] for f_path in files: try: with open(f_path, 'r', encoding='utf-8') as f: data = json.load(f) self.active_profiles_list.append(data) names.append(data.get("lab_name", os.path.basename(f_path))) loaded_count += 1 except: pass return f"✅ Loaded {loaded_count} profiles simultaneously: {', '.join(names)}" def refresh_knowledge_base(self): all_files = glob.glob(os.path.join(self.config.DATASET_DIR, "*.csv")) df_list = [] for filename in all_files: try: temp_df = pd.read_csv(filename, encoding='utf-8-sig', on_bad_lines='skip') temp_df["Dialect"] = os.path.basename(filename).replace(".csv", "") df_list.append(temp_df) except: pass if df_list: self.df = pd.concat(df_list, ignore_index=True) self.lookup_list = self.df["Utterance"].tolist() else: self.lookup_list = [] def normalize_keys(self, data_list): cleaned_list = [] for item in data_list: new_item = { "Dialect": item.get("Dialect") or item.get("dialect", "Unknown"), "Clarification": item.get("Clarification") or item.get("clarification", "---"), "Tone": item.get("Tone") or item.get("tone", "---"), "Context": item.get("Context") or item.get("context", "---"), "Pragmatic Analysis": item.get("Pragmatic Analysis") or item.get("pragmatics", "---"), "Source": item.get("Source", "✨ GPT-4 Generated") } cleaned_list.append(new_item) return cleaned_list # ------------------------------------------------------------------------ # AI GENERATION METHODS (GPT-4) # ------------------------------------------------------------------------ def generate_mission(self, context="General"): """ Generates a random, realistic scenario based on the context using GPT-4. """ system_prompt = "You are an expert scenario generator for linguistic roleplay." user_prompt = f""" Generate a short, realistic roleplay scenario for a user to speak a dialect phrase. Constraints: 1. Context: {context} 2. Target Audience: Speakers of non-standard English dialects (e.g., Nigerian, Indian). 3. Length: 1 sentence. 4. Goal: Provoke a natural reaction (complaint, greeting, warning, negotiation). 5. Output Format: JSON with keys 'text' and 'emoji'. Examples: - Context: Traffic -> {{"text": "A bus driver cut you off. Yell at him.", "emoji": "🚌"}} - Context: Market -> {{"text": "The fish is too expensive. Ask for a discount.", "emoji": "🐟"}} """ try: response_text = self._call_gpt4(system_prompt, user_prompt) if not response_text: raise Exception("No response from GPT") # Clean up JSON (OpenAI sometimes adds markdown) clean_json = response_text.replace("```json", "").replace("```", "").strip() return json.loads(clean_json) except: return {"text": "Describe your morning routine.", "emoji": "☀️"} def generate_unknown_analysis(self, text): if not self.openai_manager: return [] all_jargon_keys = [] for p in self.active_profiles_list: all_jargon_keys.extend(list(p.get("jargon", {}).keys())) system_prompt = "You are an expert linguist specializing in Global English Dialects and Pragmatics." user_prompt = f""" Analyze utterance: "{text}" Context/Jargon Keys Available: {list(set(all_jargon_keys))[:50]} Task: Provide 3 distinct interpretations (Casual, Formal, or Cultural). CRITICAL INSTRUCTION: Treat the input as valid, meaningful Dialectal English. Do NOT label it as "incorrect". Output Strictly JSON array: [ {{ "Dialect": "General", "Clarification": "...", "Tone": "...", "Context": "...", "Pragmatics": "..." }} ] """ try: response_text = self._call_gpt4(system_prompt, user_prompt) # Uses standard GPT-4 clean_text = re.sub(r"```json|```", "", response_text).strip() data = json.loads(clean_text) return self.normalize_keys(data) except: return [{"Dialect": "Unknown", "Clarification": "Analysis Failed", "Tone": "---", "Context": "---", "Pragmatic Analysis": "Error"}] def adapt_with_ai(self, full_text, db_row): if not self.openai_manager: return db_row["Clarification"], db_row["Pragmatic_Analysis"] system_prompt = "You are a dialect adaptation engine." user_prompt = f""" Ref Term: "{db_row['Utterance']}" = "{db_row['Clarification']}" User said: "{full_text}" Task: Adapt meaning to full sentence. Treat as valid dialect. Output JSON: {{ "clarification": "...", "pragmatics": "..." }} """ try: # Using GPT-4o-mini or GPT-3.5-turbo here if speed is needed, otherwise default to GPT-4 response_text = self._call_gpt4(system_prompt, user_prompt, model="gpt-4") clean_json = re.search(r"\{.*\}", response_text, re.DOTALL) if clean_json: data = json.loads(clean_json.group(0)) return data.get("clarification", db_row["Clarification"]), data.get("pragmatics", "AI Adapted Analysis") except: pass return db_row["Clarification"], db_row["Pragmatic_Analysis"] def detect_and_analyze(self, text, threshold=60): clean_text = text.lower().strip() seen_indices = set() immediate_results = [] partial_candidates = [] # 1. Regex & Exact Match if not self.df.empty: for index, row in self.df.iterrows(): match_type = None try: regex = str(row.get("Syntax_Pattern", "")) if len(regex) > 2 and re.search(regex, clean_text, re.IGNORECASE): match_type = "Regex_Match" except: pass if not match_type: db_str = str(row["Utterance"]).strip().lower() if len(db_str) > 3 and db_str in clean_text: match_type = "Exact_Substring" if match_type: seen_indices.add(index) immediate_results.append({ "Source": f"💎 Database ({match_type})", "Dialect": row["Dialect"], "Clarification": row["Clarification"], "Tone": row.get("Tone_Category", "---"), "Context": row.get("Linguistic_Context", "---"), "Pragmatic Analysis": row.get("Pragmatic_Analysis", "---") }) # 2. Fuzzy Matching if not self.lookup_list: self.lookup_list = [] matches = process.extract(clean_text, self.lookup_list, scorer=fuzz.token_set_ratio, limit=5) for match_str, score, index in matches: if score >= threshold and index not in seen_indices and index < len(self.df): seen_indices.add(index) row = self.df.iloc[index] partial_candidates.append({"row": row, "match_len": score, "type": f"Fuzzy ({score}%)"}) # 3. Profile Jargon Matching for profile in self.active_profiles_list: jargon_dict = profile.get("jargon", {}) for term, definition in jargon_dict.items(): if term.lower() in clean_text: immediate_results.append({ "Source": f"📜 Profile Rule ({term})", "Dialect": profile.get("lab_name", "Profile"), "Clarification": definition, "Tone": "Detected Jargon", "Context": f"Found in {profile.get('lab_name')} Profile", "Pragmatic Analysis": "Direct Profile Match" }) final_results = list(immediate_results) partial_candidates.sort(key=lambda x: x["match_len"], reverse=True) top_candidates = partial_candidates[:3] # 4. Concurrent AI Analysis (GPT-4) fallback_future = self.executor.submit(self.generate_unknown_analysis, text) db_futures = {} for cand in top_candidates: f = self.executor.submit(self.adapt_with_ai, text, cand["row"]) db_futures[f] = cand done, not_done = concurrent.futures.wait(list(db_futures.keys()) + [fallback_future], timeout=5.5, return_when=concurrent.futures.ALL_COMPLETED) for f in db_futures: if f in done: try: clar, prag = f.result() cand = db_futures[f] final_results.append({ "Source": f"💎 DB + AI ({cand['type']})", "Dialect": cand["row"]["Dialect"], "Clarification": clar, "Tone": cand["row"].get("Tone_Category", "---"), "Context": cand["row"].get("Linguistic_Context", "---"), "Pragmatic Analysis": prag }) except: pass if len(final_results) < 3 and fallback_future in done: try: res = self.normalize_keys(fallback_future.result()) final_results += res except: pass if not final_results: final_results.append({"Source": "⚠️ AI Timeout", "Dialect": "---", "Clarification": "System Busy", "Tone": "---", "Context": "---", "Pragmatic Analysis": "---"}) return final_results[:3] def get_rich_suggestions(self, text, dialect): if not self.openai_manager: return [] system_prompt = "You are a linguistics assistant." user_prompt = f"""interpret "{text}" ({dialect}). Output 3 JSON options: [{{ "clarification": "", "tone": "", "context": "", "pragmatics": "" }}]""" try: response_text = self._call_gpt4(system_prompt, user_prompt) return json.loads(re.sub(r"```json|```", "", response_text).strip()) except: return [] def generate_syntax_pattern(self, utterance): safe = r"\b" + re.escape(utterance.lower()) + r"\b" if not self.openai_manager: return safe try: response_text = self._call_gpt4( "You are a regex expert.", f"Create a python Regex for: '{utterance}'. Return ONLY regex string. No code blocks." ) pat = response_text.strip().replace("`", "") re.compile(pat) return pat except: return safe