PureVersation / src /brain_agent_gpt4.py
toecm's picture
Create brain_agent_gpt4.py
fc99505 verified
"""
🧠 Agent 2 (Interpretation) - GPT-4 EDITION
-------------------------------------------------
This version of the AgentBrain is specifically designed to work with OpenAI's GPT-4.
It replaces Google Gemini dependencies with OpenAI API calls.
Requirements:
- An 'openai_manager_instance' must be passed during initialization.
- This manager is expected to hold an initialized 'client' (AsyncOpenAI or OpenAI).
"""
import os
import glob
import pandas as pd
import json
import re
import concurrent.futures
from rapidfuzz import process, fuzz
class AgentInterpretation:
def __init__(self, config, openai_manager_instance=None):
self.config = config
self.df = pd.DataFrame()
self.lookup_list = []
self.openai_manager = openai_manager_instance # Renamed from gemini_manager
self.active_profiles_list = []
# Load default
self.load_profile_by_name("NSL Lab Trainer.json")
self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
print("🧠 Agent 2 (Interpretation - GPT-4) Online: Persistent Pool Ready.")
self.refresh_knowledge_base()
# ------------------------------------------------------------------------
# HELPER: GPT-4 API Call Wrapper
# ------------------------------------------------------------------------
def _call_gpt4(self, system_prompt, user_prompt, model="gpt-4"):
"""
Internal helper to standardize GPT-4 calls.
Assumes self.openai_manager has a valid .client (OpenAI client).
"""
if not self.openai_manager or not hasattr(self.openai_manager, 'client'):
return None
try:
response = self.openai_manager.client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.7
)
return response.choices[0].message.content
except Exception as e:
print(f"❌ GPT-4 Error: {e}")
return None
# ------------------------------------------------------------------------
# CORE PROFILE & DATA METHODS
# ------------------------------------------------------------------------
def get_available_profiles(self):
files = glob.glob(os.path.join(self.config.PROFILES_DIR, "*.json"))
return [os.path.basename(f) for f in files]
def load_profile_by_name(self, filename):
path = os.path.join(self.config.PROFILES_DIR, filename)
try:
with open(path, 'r', encoding='utf-8') as f:
self.lab_profile = json.load(f)
return self.lab_profile
except: return {}
def save_specific_profile(self, filename, json_str):
if not filename.endswith(".json"): filename += ".json"
path = os.path.join(self.config.PROFILES_DIR, filename)
try:
with open(path, "w", encoding="utf-8") as f: json.dump(json.loads(json_str), f, indent=2)
return "✅ Saved locally (HF Sync pending)"
except Exception as e: return f"❌ Error: {e}"
def get_current_profile_text(self):
return json.dumps(self.lab_profile, indent=2)
def load_all_profiles_simultaneously(self):
files = glob.glob(os.path.join(self.config.PROFILES_DIR, "*.json"))
loaded_count = 0
names = []
self.active_profiles_list = []
for f_path in files:
try:
with open(f_path, 'r', encoding='utf-8') as f:
data = json.load(f)
self.active_profiles_list.append(data)
names.append(data.get("lab_name", os.path.basename(f_path)))
loaded_count += 1
except: pass
return f"✅ Loaded {loaded_count} profiles simultaneously: {', '.join(names)}"
def refresh_knowledge_base(self):
all_files = glob.glob(os.path.join(self.config.DATASET_DIR, "*.csv"))
df_list = []
for filename in all_files:
try:
temp_df = pd.read_csv(filename, encoding='utf-8-sig', on_bad_lines='skip')
temp_df["Dialect"] = os.path.basename(filename).replace(".csv", "")
df_list.append(temp_df)
except: pass
if df_list:
self.df = pd.concat(df_list, ignore_index=True)
self.lookup_list = self.df["Utterance"].tolist()
else:
self.lookup_list = []
def normalize_keys(self, data_list):
cleaned_list = []
for item in data_list:
new_item = {
"Dialect": item.get("Dialect") or item.get("dialect", "Unknown"),
"Clarification": item.get("Clarification") or item.get("clarification", "---"),
"Tone": item.get("Tone") or item.get("tone", "---"),
"Context": item.get("Context") or item.get("context", "---"),
"Pragmatic Analysis": item.get("Pragmatic Analysis") or item.get("pragmatics", "---"),
"Source": item.get("Source", "✨ GPT-4 Generated")
}
cleaned_list.append(new_item)
return cleaned_list
# ------------------------------------------------------------------------
# AI GENERATION METHODS (GPT-4)
# ------------------------------------------------------------------------
def generate_mission(self, context="General"):
"""
Generates a random, realistic scenario based on the context using GPT-4.
"""
system_prompt = "You are an expert scenario generator for linguistic roleplay."
user_prompt = f"""
Generate a short, realistic roleplay scenario for a user to speak a dialect phrase.
Constraints:
1. Context: {context}
2. Target Audience: Speakers of non-standard English dialects (e.g., Nigerian, Indian).
3. Length: 1 sentence.
4. Goal: Provoke a natural reaction (complaint, greeting, warning, negotiation).
5. Output Format: JSON with keys 'text' and 'emoji'.
Examples:
- Context: Traffic -> {{"text": "A bus driver cut you off. Yell at him.", "emoji": "🚌"}}
- Context: Market -> {{"text": "The fish is too expensive. Ask for a discount.", "emoji": "🐟"}}
"""
try:
response_text = self._call_gpt4(system_prompt, user_prompt)
if not response_text: raise Exception("No response from GPT")
# Clean up JSON (OpenAI sometimes adds markdown)
clean_json = response_text.replace("```json", "").replace("```", "").strip()
return json.loads(clean_json)
except:
return {"text": "Describe your morning routine.", "emoji": "☀️"}
def generate_unknown_analysis(self, text):
if not self.openai_manager: return []
all_jargon_keys = []
for p in self.active_profiles_list:
all_jargon_keys.extend(list(p.get("jargon", {}).keys()))
system_prompt = "You are an expert linguist specializing in Global English Dialects and Pragmatics."
user_prompt = f"""
Analyze utterance: "{text}"
Context/Jargon Keys Available: {list(set(all_jargon_keys))[:50]}
Task: Provide 3 distinct interpretations (Casual, Formal, or Cultural).
CRITICAL INSTRUCTION: Treat the input as valid, meaningful Dialectal English. Do NOT label it as "incorrect".
Output Strictly JSON array: [ {{ "Dialect": "General", "Clarification": "...", "Tone": "...", "Context": "...", "Pragmatics": "..." }} ]
"""
try:
response_text = self._call_gpt4(system_prompt, user_prompt) # Uses standard GPT-4
clean_text = re.sub(r"```json|```", "", response_text).strip()
data = json.loads(clean_text)
return self.normalize_keys(data)
except:
return [{"Dialect": "Unknown", "Clarification": "Analysis Failed", "Tone": "---", "Context": "---", "Pragmatic Analysis": "Error"}]
def adapt_with_ai(self, full_text, db_row):
if not self.openai_manager: return db_row["Clarification"], db_row["Pragmatic_Analysis"]
system_prompt = "You are a dialect adaptation engine."
user_prompt = f"""
Ref Term: "{db_row['Utterance']}" = "{db_row['Clarification']}"
User said: "{full_text}"
Task: Adapt meaning to full sentence. Treat as valid dialect.
Output JSON: {{ "clarification": "...", "pragmatics": "..." }}
"""
try:
# Using GPT-4o-mini or GPT-3.5-turbo here if speed is needed, otherwise default to GPT-4
response_text = self._call_gpt4(system_prompt, user_prompt, model="gpt-4")
clean_json = re.search(r"\{.*\}", response_text, re.DOTALL)
if clean_json:
data = json.loads(clean_json.group(0))
return data.get("clarification", db_row["Clarification"]), data.get("pragmatics", "AI Adapted Analysis")
except: pass
return db_row["Clarification"], db_row["Pragmatic_Analysis"]
def detect_and_analyze(self, text, threshold=60):
clean_text = text.lower().strip()
seen_indices = set()
immediate_results = []
partial_candidates = []
# 1. Regex & Exact Match
if not self.df.empty:
for index, row in self.df.iterrows():
match_type = None
try:
regex = str(row.get("Syntax_Pattern", ""))
if len(regex) > 2 and re.search(regex, clean_text, re.IGNORECASE):
match_type = "Regex_Match"
except: pass
if not match_type:
db_str = str(row["Utterance"]).strip().lower()
if len(db_str) > 3 and db_str in clean_text:
match_type = "Exact_Substring"
if match_type:
seen_indices.add(index)
immediate_results.append({
"Source": f"💎 Database ({match_type})", "Dialect": row["Dialect"],
"Clarification": row["Clarification"], "Tone": row.get("Tone_Category", "---"),
"Context": row.get("Linguistic_Context", "---"), "Pragmatic Analysis": row.get("Pragmatic_Analysis", "---")
})
# 2. Fuzzy Matching
if not self.lookup_list: self.lookup_list = []
matches = process.extract(clean_text, self.lookup_list, scorer=fuzz.token_set_ratio, limit=5)
for match_str, score, index in matches:
if score >= threshold and index not in seen_indices and index < len(self.df):
seen_indices.add(index)
row = self.df.iloc[index]
partial_candidates.append({"row": row, "match_len": score, "type": f"Fuzzy ({score}%)"})
# 3. Profile Jargon Matching
for profile in self.active_profiles_list:
jargon_dict = profile.get("jargon", {})
for term, definition in jargon_dict.items():
if term.lower() in clean_text:
immediate_results.append({
"Source": f"📜 Profile Rule ({term})", "Dialect": profile.get("lab_name", "Profile"),
"Clarification": definition, "Tone": "Detected Jargon",
"Context": f"Found in {profile.get('lab_name')} Profile", "Pragmatic Analysis": "Direct Profile Match"
})
final_results = list(immediate_results)
partial_candidates.sort(key=lambda x: x["match_len"], reverse=True)
top_candidates = partial_candidates[:3]
# 4. Concurrent AI Analysis (GPT-4)
fallback_future = self.executor.submit(self.generate_unknown_analysis, text)
db_futures = {}
for cand in top_candidates:
f = self.executor.submit(self.adapt_with_ai, text, cand["row"])
db_futures[f] = cand
done, not_done = concurrent.futures.wait(list(db_futures.keys()) + [fallback_future], timeout=5.5, return_when=concurrent.futures.ALL_COMPLETED)
for f in db_futures:
if f in done:
try:
clar, prag = f.result()
cand = db_futures[f]
final_results.append({
"Source": f"💎 DB + AI ({cand['type']})", "Dialect": cand["row"]["Dialect"],
"Clarification": clar, "Tone": cand["row"].get("Tone_Category", "---"),
"Context": cand["row"].get("Linguistic_Context", "---"), "Pragmatic Analysis": prag
})
except: pass
if len(final_results) < 3 and fallback_future in done:
try:
res = self.normalize_keys(fallback_future.result())
final_results += res
except: pass
if not final_results:
final_results.append({"Source": "⚠️ AI Timeout", "Dialect": "---", "Clarification": "System Busy", "Tone": "---", "Context": "---", "Pragmatic Analysis": "---"})
return final_results[:3]
def get_rich_suggestions(self, text, dialect):
if not self.openai_manager: return []
system_prompt = "You are a linguistics assistant."
user_prompt = f"""interpret "{text}" ({dialect}). Output 3 JSON options: [{{ "clarification": "", "tone": "", "context": "", "pragmatics": "" }}]"""
try:
response_text = self._call_gpt4(system_prompt, user_prompt)
return json.loads(re.sub(r"```json|```", "", response_text).strip())
except: return []
def generate_syntax_pattern(self, utterance):
safe = r"\b" + re.escape(utterance.lower()) + r"\b"
if not self.openai_manager: return safe
try:
response_text = self._call_gpt4(
"You are a regex expert.",
f"Create a python Regex for: '{utterance}'. Return ONLY regex string. No code blocks."
)
pat = response_text.strip().replace("`", "")
re.compile(pat)
return pat
except: return safe