Spaces:
Running
Running
| """ | |
| 🧠 Agent 2 (Interpretation) - GPT-4 EDITION | |
| ------------------------------------------------- | |
| This version of the AgentBrain is specifically designed to work with OpenAI's GPT-4. | |
| It replaces Google Gemini dependencies with OpenAI API calls. | |
| Requirements: | |
| - An 'openai_manager_instance' must be passed during initialization. | |
| - This manager is expected to hold an initialized 'client' (AsyncOpenAI or OpenAI). | |
| """ | |
| import os | |
| import glob | |
| import pandas as pd | |
| import json | |
| import re | |
| import concurrent.futures | |
| from rapidfuzz import process, fuzz | |
| class AgentInterpretation: | |
| def __init__(self, config, openai_manager_instance=None): | |
| self.config = config | |
| self.df = pd.DataFrame() | |
| self.lookup_list = [] | |
| self.openai_manager = openai_manager_instance # Renamed from gemini_manager | |
| self.active_profiles_list = [] | |
| # Load default | |
| self.load_profile_by_name("NSL Lab Trainer.json") | |
| self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=10) | |
| print("🧠 Agent 2 (Interpretation - GPT-4) Online: Persistent Pool Ready.") | |
| self.refresh_knowledge_base() | |
| # ------------------------------------------------------------------------ | |
| # HELPER: GPT-4 API Call Wrapper | |
| # ------------------------------------------------------------------------ | |
| def _call_gpt4(self, system_prompt, user_prompt, model="gpt-4"): | |
| """ | |
| Internal helper to standardize GPT-4 calls. | |
| Assumes self.openai_manager has a valid .client (OpenAI client). | |
| """ | |
| if not self.openai_manager or not hasattr(self.openai_manager, 'client'): | |
| return None | |
| try: | |
| response = self.openai_manager.client.chat.completions.create( | |
| model=model, | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt} | |
| ], | |
| temperature=0.7 | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| print(f"❌ GPT-4 Error: {e}") | |
| return None | |
| # ------------------------------------------------------------------------ | |
| # CORE PROFILE & DATA METHODS | |
| # ------------------------------------------------------------------------ | |
| def get_available_profiles(self): | |
| files = glob.glob(os.path.join(self.config.PROFILES_DIR, "*.json")) | |
| return [os.path.basename(f) for f in files] | |
| def load_profile_by_name(self, filename): | |
| path = os.path.join(self.config.PROFILES_DIR, filename) | |
| try: | |
| with open(path, 'r', encoding='utf-8') as f: | |
| self.lab_profile = json.load(f) | |
| return self.lab_profile | |
| except: return {} | |
| def save_specific_profile(self, filename, json_str): | |
| if not filename.endswith(".json"): filename += ".json" | |
| path = os.path.join(self.config.PROFILES_DIR, filename) | |
| try: | |
| with open(path, "w", encoding="utf-8") as f: json.dump(json.loads(json_str), f, indent=2) | |
| return "✅ Saved locally (HF Sync pending)" | |
| except Exception as e: return f"❌ Error: {e}" | |
| def get_current_profile_text(self): | |
| return json.dumps(self.lab_profile, indent=2) | |
| def load_all_profiles_simultaneously(self): | |
| files = glob.glob(os.path.join(self.config.PROFILES_DIR, "*.json")) | |
| loaded_count = 0 | |
| names = [] | |
| self.active_profiles_list = [] | |
| for f_path in files: | |
| try: | |
| with open(f_path, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| self.active_profiles_list.append(data) | |
| names.append(data.get("lab_name", os.path.basename(f_path))) | |
| loaded_count += 1 | |
| except: pass | |
| return f"✅ Loaded {loaded_count} profiles simultaneously: {', '.join(names)}" | |
| def refresh_knowledge_base(self): | |
| all_files = glob.glob(os.path.join(self.config.DATASET_DIR, "*.csv")) | |
| df_list = [] | |
| for filename in all_files: | |
| try: | |
| temp_df = pd.read_csv(filename, encoding='utf-8-sig', on_bad_lines='skip') | |
| temp_df["Dialect"] = os.path.basename(filename).replace(".csv", "") | |
| df_list.append(temp_df) | |
| except: pass | |
| if df_list: | |
| self.df = pd.concat(df_list, ignore_index=True) | |
| self.lookup_list = self.df["Utterance"].tolist() | |
| else: | |
| self.lookup_list = [] | |
| def normalize_keys(self, data_list): | |
| cleaned_list = [] | |
| for item in data_list: | |
| new_item = { | |
| "Dialect": item.get("Dialect") or item.get("dialect", "Unknown"), | |
| "Clarification": item.get("Clarification") or item.get("clarification", "---"), | |
| "Tone": item.get("Tone") or item.get("tone", "---"), | |
| "Context": item.get("Context") or item.get("context", "---"), | |
| "Pragmatic Analysis": item.get("Pragmatic Analysis") or item.get("pragmatics", "---"), | |
| "Source": item.get("Source", "✨ GPT-4 Generated") | |
| } | |
| cleaned_list.append(new_item) | |
| return cleaned_list | |
| # ------------------------------------------------------------------------ | |
| # AI GENERATION METHODS (GPT-4) | |
| # ------------------------------------------------------------------------ | |
| def generate_mission(self, context="General"): | |
| """ | |
| Generates a random, realistic scenario based on the context using GPT-4. | |
| """ | |
| system_prompt = "You are an expert scenario generator for linguistic roleplay." | |
| user_prompt = f""" | |
| Generate a short, realistic roleplay scenario for a user to speak a dialect phrase. | |
| Constraints: | |
| 1. Context: {context} | |
| 2. Target Audience: Speakers of non-standard English dialects (e.g., Nigerian, Indian). | |
| 3. Length: 1 sentence. | |
| 4. Goal: Provoke a natural reaction (complaint, greeting, warning, negotiation). | |
| 5. Output Format: JSON with keys 'text' and 'emoji'. | |
| Examples: | |
| - Context: Traffic -> {{"text": "A bus driver cut you off. Yell at him.", "emoji": "🚌"}} | |
| - Context: Market -> {{"text": "The fish is too expensive. Ask for a discount.", "emoji": "🐟"}} | |
| """ | |
| try: | |
| response_text = self._call_gpt4(system_prompt, user_prompt) | |
| if not response_text: raise Exception("No response from GPT") | |
| # Clean up JSON (OpenAI sometimes adds markdown) | |
| clean_json = response_text.replace("```json", "").replace("```", "").strip() | |
| return json.loads(clean_json) | |
| except: | |
| return {"text": "Describe your morning routine.", "emoji": "☀️"} | |
| def generate_unknown_analysis(self, text): | |
| if not self.openai_manager: return [] | |
| all_jargon_keys = [] | |
| for p in self.active_profiles_list: | |
| all_jargon_keys.extend(list(p.get("jargon", {}).keys())) | |
| system_prompt = "You are an expert linguist specializing in Global English Dialects and Pragmatics." | |
| user_prompt = f""" | |
| Analyze utterance: "{text}" | |
| Context/Jargon Keys Available: {list(set(all_jargon_keys))[:50]} | |
| Task: Provide 3 distinct interpretations (Casual, Formal, or Cultural). | |
| CRITICAL INSTRUCTION: Treat the input as valid, meaningful Dialectal English. Do NOT label it as "incorrect". | |
| Output Strictly JSON array: [ {{ "Dialect": "General", "Clarification": "...", "Tone": "...", "Context": "...", "Pragmatics": "..." }} ] | |
| """ | |
| try: | |
| response_text = self._call_gpt4(system_prompt, user_prompt) # Uses standard GPT-4 | |
| clean_text = re.sub(r"```json|```", "", response_text).strip() | |
| data = json.loads(clean_text) | |
| return self.normalize_keys(data) | |
| except: | |
| return [{"Dialect": "Unknown", "Clarification": "Analysis Failed", "Tone": "---", "Context": "---", "Pragmatic Analysis": "Error"}] | |
| def adapt_with_ai(self, full_text, db_row): | |
| if not self.openai_manager: return db_row["Clarification"], db_row["Pragmatic_Analysis"] | |
| system_prompt = "You are a dialect adaptation engine." | |
| user_prompt = f""" | |
| Ref Term: "{db_row['Utterance']}" = "{db_row['Clarification']}" | |
| User said: "{full_text}" | |
| Task: Adapt meaning to full sentence. Treat as valid dialect. | |
| Output JSON: {{ "clarification": "...", "pragmatics": "..." }} | |
| """ | |
| try: | |
| # Using GPT-4o-mini or GPT-3.5-turbo here if speed is needed, otherwise default to GPT-4 | |
| response_text = self._call_gpt4(system_prompt, user_prompt, model="gpt-4") | |
| clean_json = re.search(r"\{.*\}", response_text, re.DOTALL) | |
| if clean_json: | |
| data = json.loads(clean_json.group(0)) | |
| return data.get("clarification", db_row["Clarification"]), data.get("pragmatics", "AI Adapted Analysis") | |
| except: pass | |
| return db_row["Clarification"], db_row["Pragmatic_Analysis"] | |
| def detect_and_analyze(self, text, threshold=60): | |
| clean_text = text.lower().strip() | |
| seen_indices = set() | |
| immediate_results = [] | |
| partial_candidates = [] | |
| # 1. Regex & Exact Match | |
| if not self.df.empty: | |
| for index, row in self.df.iterrows(): | |
| match_type = None | |
| try: | |
| regex = str(row.get("Syntax_Pattern", "")) | |
| if len(regex) > 2 and re.search(regex, clean_text, re.IGNORECASE): | |
| match_type = "Regex_Match" | |
| except: pass | |
| if not match_type: | |
| db_str = str(row["Utterance"]).strip().lower() | |
| if len(db_str) > 3 and db_str in clean_text: | |
| match_type = "Exact_Substring" | |
| if match_type: | |
| seen_indices.add(index) | |
| immediate_results.append({ | |
| "Source": f"💎 Database ({match_type})", "Dialect": row["Dialect"], | |
| "Clarification": row["Clarification"], "Tone": row.get("Tone_Category", "---"), | |
| "Context": row.get("Linguistic_Context", "---"), "Pragmatic Analysis": row.get("Pragmatic_Analysis", "---") | |
| }) | |
| # 2. Fuzzy Matching | |
| if not self.lookup_list: self.lookup_list = [] | |
| matches = process.extract(clean_text, self.lookup_list, scorer=fuzz.token_set_ratio, limit=5) | |
| for match_str, score, index in matches: | |
| if score >= threshold and index not in seen_indices and index < len(self.df): | |
| seen_indices.add(index) | |
| row = self.df.iloc[index] | |
| partial_candidates.append({"row": row, "match_len": score, "type": f"Fuzzy ({score}%)"}) | |
| # 3. Profile Jargon Matching | |
| for profile in self.active_profiles_list: | |
| jargon_dict = profile.get("jargon", {}) | |
| for term, definition in jargon_dict.items(): | |
| if term.lower() in clean_text: | |
| immediate_results.append({ | |
| "Source": f"📜 Profile Rule ({term})", "Dialect": profile.get("lab_name", "Profile"), | |
| "Clarification": definition, "Tone": "Detected Jargon", | |
| "Context": f"Found in {profile.get('lab_name')} Profile", "Pragmatic Analysis": "Direct Profile Match" | |
| }) | |
| final_results = list(immediate_results) | |
| partial_candidates.sort(key=lambda x: x["match_len"], reverse=True) | |
| top_candidates = partial_candidates[:3] | |
| # 4. Concurrent AI Analysis (GPT-4) | |
| fallback_future = self.executor.submit(self.generate_unknown_analysis, text) | |
| db_futures = {} | |
| for cand in top_candidates: | |
| f = self.executor.submit(self.adapt_with_ai, text, cand["row"]) | |
| db_futures[f] = cand | |
| done, not_done = concurrent.futures.wait(list(db_futures.keys()) + [fallback_future], timeout=5.5, return_when=concurrent.futures.ALL_COMPLETED) | |
| for f in db_futures: | |
| if f in done: | |
| try: | |
| clar, prag = f.result() | |
| cand = db_futures[f] | |
| final_results.append({ | |
| "Source": f"💎 DB + AI ({cand['type']})", "Dialect": cand["row"]["Dialect"], | |
| "Clarification": clar, "Tone": cand["row"].get("Tone_Category", "---"), | |
| "Context": cand["row"].get("Linguistic_Context", "---"), "Pragmatic Analysis": prag | |
| }) | |
| except: pass | |
| if len(final_results) < 3 and fallback_future in done: | |
| try: | |
| res = self.normalize_keys(fallback_future.result()) | |
| final_results += res | |
| except: pass | |
| if not final_results: | |
| final_results.append({"Source": "⚠️ AI Timeout", "Dialect": "---", "Clarification": "System Busy", "Tone": "---", "Context": "---", "Pragmatic Analysis": "---"}) | |
| return final_results[:3] | |
| def get_rich_suggestions(self, text, dialect): | |
| if not self.openai_manager: return [] | |
| system_prompt = "You are a linguistics assistant." | |
| user_prompt = f"""interpret "{text}" ({dialect}). Output 3 JSON options: [{{ "clarification": "", "tone": "", "context": "", "pragmatics": "" }}]""" | |
| try: | |
| response_text = self._call_gpt4(system_prompt, user_prompt) | |
| return json.loads(re.sub(r"```json|```", "", response_text).strip()) | |
| except: return [] | |
| def generate_syntax_pattern(self, utterance): | |
| safe = r"\b" + re.escape(utterance.lower()) + r"\b" | |
| if not self.openai_manager: return safe | |
| try: | |
| response_text = self._call_gpt4( | |
| "You are a regex expert.", | |
| f"Create a python Regex for: '{utterance}'. Return ONLY regex string. No code blocks." | |
| ) | |
| pat = response_text.strip().replace("`", "") | |
| re.compile(pat) | |
| return pat | |
| except: return safe |