Spaces:

toecm
/

PureVersation

Running

App Files Files Community

PureVersation / src /brain_agent_gpt4.py

toecm

Create brain_agent_gpt4.py

fc99505 verified 4 months ago

raw

history blame contribute delete

14.4 kB

	"""
	🧠 Agent 2 (Interpretation) - GPT-4 EDITION
	-------------------------------------------------
	This version of the AgentBrain is specifically designed to work with OpenAI's GPT-4.
	It replaces Google Gemini dependencies with OpenAI API calls.

	Requirements:
	- An 'openai_manager_instance' must be passed during initialization.
	- This manager is expected to hold an initialized 'client' (AsyncOpenAI or OpenAI).
	"""

	import os
	import glob
	import pandas as pd
	import json
	import re
	import concurrent.futures
	from rapidfuzz import process, fuzz

	class AgentInterpretation:
	def __init__(self, config, openai_manager_instance=None):
	self.config = config
	self.df = pd.DataFrame()
	self.lookup_list = []
	self.openai_manager = openai_manager_instance # Renamed from gemini_manager
	self.active_profiles_list = []

	# Load default
	self.load_profile_by_name("NSL Lab Trainer.json")

	self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
	print("🧠 Agent 2 (Interpretation - GPT-4) Online: Persistent Pool Ready.")
	self.refresh_knowledge_base()

	# ------------------------------------------------------------------------
	# HELPER: GPT-4 API Call Wrapper
	# ------------------------------------------------------------------------
	def _call_gpt4(self, system_prompt, user_prompt, model="gpt-4"):
	"""
	Internal helper to standardize GPT-4 calls.
	Assumes self.openai_manager has a valid .client (OpenAI client).
	"""
	if not self.openai_manager or not hasattr(self.openai_manager, 'client'):
	return None

	try:
	response = self.openai_manager.client.chat.completions.create(
	model=model,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt}
	],
	temperature=0.7
	)
	return response.choices[0].message.content
	except Exception as e:
	print(f"❌ GPT-4 Error: {e}")
	return None

	# ------------------------------------------------------------------------
	# CORE PROFILE & DATA METHODS
	# ------------------------------------------------------------------------

	def get_available_profiles(self):
	files = glob.glob(os.path.join(self.config.PROFILES_DIR, "*.json"))
	return [os.path.basename(f) for f in files]

	def load_profile_by_name(self, filename):
	path = os.path.join(self.config.PROFILES_DIR, filename)
	try:
	with open(path, 'r', encoding='utf-8') as f:
	self.lab_profile = json.load(f)
	return self.lab_profile
	except: return {}

	def save_specific_profile(self, filename, json_str):
	if not filename.endswith(".json"): filename += ".json"
	path = os.path.join(self.config.PROFILES_DIR, filename)
	try:
	with open(path, "w", encoding="utf-8") as f: json.dump(json.loads(json_str), f, indent=2)
	return "✅ Saved locally (HF Sync pending)"
	except Exception as e: return f"❌ Error: {e}"

	def get_current_profile_text(self):
	return json.dumps(self.lab_profile, indent=2)

	def load_all_profiles_simultaneously(self):
	files = glob.glob(os.path.join(self.config.PROFILES_DIR, "*.json"))
	loaded_count = 0
	names = []
	self.active_profiles_list = []

	for f_path in files:
	try:
	with open(f_path, 'r', encoding='utf-8') as f:
	data = json.load(f)
	self.active_profiles_list.append(data)
	names.append(data.get("lab_name", os.path.basename(f_path)))
	loaded_count += 1
	except: pass
	return f"✅ Loaded {loaded_count} profiles simultaneously: {', '.join(names)}"

	def refresh_knowledge_base(self):
	all_files = glob.glob(os.path.join(self.config.DATASET_DIR, "*.csv"))
	df_list = []
	for filename in all_files:
	try:
	temp_df = pd.read_csv(filename, encoding='utf-8-sig', on_bad_lines='skip')
	temp_df["Dialect"] = os.path.basename(filename).replace(".csv", "")
	df_list.append(temp_df)
	except: pass
	if df_list:
	self.df = pd.concat(df_list, ignore_index=True)
	self.lookup_list = self.df["Utterance"].tolist()
	else:
	self.lookup_list = []

	def normalize_keys(self, data_list):
	cleaned_list = []
	for item in data_list:
	new_item = {
	"Dialect": item.get("Dialect") or item.get("dialect", "Unknown"),
	"Clarification": item.get("Clarification") or item.get("clarification", "---"),
	"Tone": item.get("Tone") or item.get("tone", "---"),
	"Context": item.get("Context") or item.get("context", "---"),
	"Pragmatic Analysis": item.get("Pragmatic Analysis") or item.get("pragmatics", "---"),
	"Source": item.get("Source", "✨ GPT-4 Generated")
	}
	cleaned_list.append(new_item)
	return cleaned_list

	# ------------------------------------------------------------------------
	# AI GENERATION METHODS (GPT-4)
	# ------------------------------------------------------------------------

	def generate_mission(self, context="General"):
	"""
	Generates a random, realistic scenario based on the context using GPT-4.
	"""
	system_prompt = "You are an expert scenario generator for linguistic roleplay."
	user_prompt = f"""
	Generate a short, realistic roleplay scenario for a user to speak a dialect phrase.

	Constraints:
	1. Context: {context}
	2. Target Audience: Speakers of non-standard English dialects (e.g., Nigerian, Indian).
	3. Length: 1 sentence.
	4. Goal: Provoke a natural reaction (complaint, greeting, warning, negotiation).
	5. Output Format: JSON with keys 'text' and 'emoji'.

	Examples:
	- Context: Traffic -> {{"text": "A bus driver cut you off. Yell at him.", "emoji": "🚌"}}
	- Context: Market -> {{"text": "The fish is too expensive. Ask for a discount.", "emoji": "🐟"}}
	"""

	try:
	response_text = self._call_gpt4(system_prompt, user_prompt)
	if not response_text: raise Exception("No response from GPT")

	# Clean up JSON (OpenAI sometimes adds markdown)
	clean_json = response_text.replace("```json", "").replace("```", "").strip()
	return json.loads(clean_json)
	except:
	return {"text": "Describe your morning routine.", "emoji": "☀️"}

	def generate_unknown_analysis(self, text):
	if not self.openai_manager: return []
	all_jargon_keys = []
	for p in self.active_profiles_list:
	all_jargon_keys.extend(list(p.get("jargon", {}).keys()))

	system_prompt = "You are an expert linguist specializing in Global English Dialects and Pragmatics."
	user_prompt = f"""
	Analyze utterance: "{text}"
	Context/Jargon Keys Available: {list(set(all_jargon_keys))[:50]}

	Task: Provide 3 distinct interpretations (Casual, Formal, or Cultural).
	CRITICAL INSTRUCTION: Treat the input as valid, meaningful Dialectal English. Do NOT label it as "incorrect".

	Output Strictly JSON array: [ {{ "Dialect": "General", "Clarification": "...", "Tone": "...", "Context": "...", "Pragmatics": "..." }} ]
	"""
	try:
	response_text = self._call_gpt4(system_prompt, user_prompt) # Uses standard GPT-4
	clean_text = re.sub(r"```json\|```", "", response_text).strip()
	data = json.loads(clean_text)
	return self.normalize_keys(data)
	except:
	return [{"Dialect": "Unknown", "Clarification": "Analysis Failed", "Tone": "---", "Context": "---", "Pragmatic Analysis": "Error"}]

	def adapt_with_ai(self, full_text, db_row):
	if not self.openai_manager: return db_row["Clarification"], db_row["Pragmatic_Analysis"]

	system_prompt = "You are a dialect adaptation engine."
	user_prompt = f"""
	Ref Term: "{db_row['Utterance']}" = "{db_row['Clarification']}"
	User said: "{full_text}"
	Task: Adapt meaning to full sentence. Treat as valid dialect.
	Output JSON: {{ "clarification": "...", "pragmatics": "..." }}
	"""
	try:
	# Using GPT-4o-mini or GPT-3.5-turbo here if speed is needed, otherwise default to GPT-4
	response_text = self._call_gpt4(system_prompt, user_prompt, model="gpt-4")
	clean_json = re.search(r"\{.*\}", response_text, re.DOTALL)
	if clean_json:
	data = json.loads(clean_json.group(0))
	return data.get("clarification", db_row["Clarification"]), data.get("pragmatics", "AI Adapted Analysis")
	except: pass
	return db_row["Clarification"], db_row["Pragmatic_Analysis"]

	def detect_and_analyze(self, text, threshold=60):
	clean_text = text.lower().strip()
	seen_indices = set()
	immediate_results = []
	partial_candidates = []

	# 1. Regex & Exact Match
	if not self.df.empty:
	for index, row in self.df.iterrows():
	match_type = None
	try:
	regex = str(row.get("Syntax_Pattern", ""))
	if len(regex) > 2 and re.search(regex, clean_text, re.IGNORECASE):
	match_type = "Regex_Match"
	except: pass

	if not match_type:
	db_str = str(row["Utterance"]).strip().lower()
	if len(db_str) > 3 and db_str in clean_text:
	match_type = "Exact_Substring"

	if match_type:
	seen_indices.add(index)
	immediate_results.append({
	"Source": f"💎 Database ({match_type})", "Dialect": row["Dialect"],
	"Clarification": row["Clarification"], "Tone": row.get("Tone_Category", "---"),
	"Context": row.get("Linguistic_Context", "---"), "Pragmatic Analysis": row.get("Pragmatic_Analysis", "---")
	})

	# 2. Fuzzy Matching
	if not self.lookup_list: self.lookup_list = []
	matches = process.extract(clean_text, self.lookup_list, scorer=fuzz.token_set_ratio, limit=5)

	for match_str, score, index in matches:
	if score >= threshold and index not in seen_indices and index < len(self.df):
	seen_indices.add(index)
	row = self.df.iloc[index]
	partial_candidates.append({"row": row, "match_len": score, "type": f"Fuzzy ({score}%)"})

	# 3. Profile Jargon Matching
	for profile in self.active_profiles_list:
	jargon_dict = profile.get("jargon", {})
	for term, definition in jargon_dict.items():
	if term.lower() in clean_text:
	immediate_results.append({
	"Source": f"📜 Profile Rule ({term})", "Dialect": profile.get("lab_name", "Profile"),
	"Clarification": definition, "Tone": "Detected Jargon",
	"Context": f"Found in {profile.get('lab_name')} Profile", "Pragmatic Analysis": "Direct Profile Match"
	})

	final_results = list(immediate_results)
	partial_candidates.sort(key=lambda x: x["match_len"], reverse=True)
	top_candidates = partial_candidates[:3]

	# 4. Concurrent AI Analysis (GPT-4)
	fallback_future = self.executor.submit(self.generate_unknown_analysis, text)
	db_futures = {}
	for cand in top_candidates:
	f = self.executor.submit(self.adapt_with_ai, text, cand["row"])
	db_futures[f] = cand

	done, not_done = concurrent.futures.wait(list(db_futures.keys()) + [fallback_future], timeout=5.5, return_when=concurrent.futures.ALL_COMPLETED)

	for f in db_futures:
	if f in done:
	try:
	clar, prag = f.result()
	cand = db_futures[f]
	final_results.append({
	"Source": f"💎 DB + AI ({cand['type']})", "Dialect": cand["row"]["Dialect"],
	"Clarification": clar, "Tone": cand["row"].get("Tone_Category", "---"),
	"Context": cand["row"].get("Linguistic_Context", "---"), "Pragmatic Analysis": prag
	})
	except: pass

	if len(final_results) < 3 and fallback_future in done:
	try:
	res = self.normalize_keys(fallback_future.result())
	final_results += res
	except: pass

	if not final_results:
	final_results.append({"Source": "⚠️ AI Timeout", "Dialect": "---", "Clarification": "System Busy", "Tone": "---", "Context": "---", "Pragmatic Analysis": "---"})

	return final_results[:3]

	def get_rich_suggestions(self, text, dialect):
	if not self.openai_manager: return []
	system_prompt = "You are a linguistics assistant."
	user_prompt = f"""interpret "{text}" ({dialect}). Output 3 JSON options: [{{ "clarification": "", "tone": "", "context": "", "pragmatics": "" }}]"""
	try:
	response_text = self._call_gpt4(system_prompt, user_prompt)
	return json.loads(re.sub(r"```json\|```", "", response_text).strip())
	except: return []

	def generate_syntax_pattern(self, utterance):
	safe = r"\b" + re.escape(utterance.lower()) + r"\b"
	if not self.openai_manager: return safe
	try:
	response_text = self._call_gpt4(
	"You are a regex expert.",
	f"Create a python Regex for: '{utterance}'. Return ONLY regex string. No code blocks."
	)
	pat = response_text.strip().replace("`", "")
	re.compile(pat)
	return pat
	except: return safe