AJAY KASU commited on
Commit
911b780
·
1 Parent(s): 5dafb0f

Refactor: Robust JSON extraction and grounded PM commentary for Bytez AI

Browse files
Files changed (2) hide show
  1. ai/ai_reporter.py +42 -19
  2. ai/prompts.py +17 -8
ai/ai_reporter.py CHANGED
@@ -26,7 +26,7 @@ class AIReporter:
26
  self.model_path = "meta-llama/Meta-Llama-3-8B-Instruct"
27
  self.endpoint = f"{self.base_url}/{self.model_path}"
28
 
29
- def _call_bytez(self, messages: list, max_tokens: int = 500, temperature: float = 0.7) -> str:
30
  """
31
  Helper to make the POST request to Bytez.
32
  """
@@ -41,7 +41,9 @@ class AIReporter:
41
  payload = {
42
  "messages": messages,
43
  "max_tokens": max_tokens,
44
- "temperature": temperature
 
 
45
  }
46
 
47
  try:
@@ -64,25 +66,40 @@ class AIReporter:
64
  def parse_intent(self, user_prompt: str) -> list:
65
  """
66
  Uses Bytez AI to map user prompt to a list of exact GICS sectors to exclude.
 
67
  """
68
  logger.info(f"Parsing intent with Bytez for prompt: {user_prompt[:50]}...")
69
 
70
  messages = [
71
  {"role": "system", "content": INTENT_PARSER_SYSTEM_PROMPT},
72
- {"role": "user", "content": f"Parse this prompt for sector exclusions: '{user_prompt}'"}
73
  ]
74
 
75
  try:
76
- content = self._call_bytez(messages, max_tokens=100, temperature=0.0)
 
 
 
 
 
 
77
  if not content:
78
- logger.warning("Empty response from Bytez for Intent Parsing. Returning empty list.")
79
  return []
80
 
81
- # Clean content for JSON extraction
82
  import re
 
83
  match = re.search(r'\[.*\]', content.strip(), re.DOTALL)
84
  if match:
85
- return json.loads(match.group(0))
 
 
 
 
 
 
 
86
  return []
87
 
88
  except Exception as e:
@@ -100,20 +117,26 @@ class AIReporter:
100
  from datetime import datetime
101
  current_date = datetime.now().strftime("%B %d, %Y")
102
 
 
 
 
103
  # Format the user prompt
104
  user_prompt = f"""
105
  Current Date: {current_date}
106
- INSTRUCTION: Start your commentary exactly with the header: "Market Commentary - {current_date}"
107
- """ + ATTRIBUTION_PROMPT_TEMPLATE.format(
108
- excluded_sector=excluded_sector,
109
- total_active_return=attribution_report.total_active_return * 100,
110
- allocation_effect=attribution_report.allocation_effect * 100,
111
- selection_effect=attribution_report.selection_effect * 100,
112
- top_contributors=json.dumps(attribution_report.top_contributors, indent=2),
113
- top_detractors=json.dumps(attribution_report.top_detractors, indent=2),
114
- sector_positioning=json.dumps(attribution_report.sector_exposure, indent=2),
115
- current_date=current_date
116
- )
 
 
 
117
 
118
  if not self.api_key:
119
  return f"AI Commentary Unavailable. (Missing BYTEZ_API_KEY). Current Date: {current_date}"
@@ -129,7 +152,7 @@ INSTRUCTION: Start your commentary exactly with the header: "Market Commentary -
129
  if not commentary:
130
  return "AI Commentary generation timed out or failed. Please try again."
131
 
132
- return commentary
133
 
134
  except Exception as e:
135
  logger.error(f"Failed to generate Bytez report: {e}")
 
26
  self.model_path = "meta-llama/Meta-Llama-3-8B-Instruct"
27
  self.endpoint = f"{self.base_url}/{self.model_path}"
28
 
29
+ def _call_bytez(self, messages: list, max_tokens: int = 500, temperature: float = 0.7, top_p: float = 0.9) -> str:
30
  """
31
  Helper to make the POST request to Bytez.
32
  """
 
41
  payload = {
42
  "messages": messages,
43
  "max_tokens": max_tokens,
44
+ "temperature": temperature,
45
+ "top_p": top_p,
46
+ "response_format": {"type": "json_object"} if temperature < 0.2 else None
47
  }
48
 
49
  try:
 
66
  def parse_intent(self, user_prompt: str) -> list:
67
  """
68
  Uses Bytez AI to map user prompt to a list of exact GICS sectors to exclude.
69
+ Highly deterministic (temp=0.1) with robust regex extraction.
70
  """
71
  logger.info(f"Parsing intent with Bytez for prompt: {user_prompt[:50]}...")
72
 
73
  messages = [
74
  {"role": "system", "content": INTENT_PARSER_SYSTEM_PROMPT},
75
+ {"role": "user", "content": f"Parse: '{user_prompt}'"}
76
  ]
77
 
78
  try:
79
+ # Intent Parser uses low temperature and top_p for determinism
80
+ content = self._call_bytez(
81
+ messages,
82
+ max_tokens=100,
83
+ temperature=0.1
84
+ )
85
+
86
  if not content:
87
+ logger.warning("Empty response from Bytez for Intent Parsing. Defaulting to [].")
88
  return []
89
 
90
+ # Robust Regex Fallback Extraction
91
  import re
92
+ # Look for the JSON list [ ... ]
93
  match = re.search(r'\[.*\]', content.strip(), re.DOTALL)
94
  if match:
95
+ extracted_json = match.group(0)
96
+ try:
97
+ return json.loads(extracted_json)
98
+ except json.JSONDecodeError as je:
99
+ logger.error(f"JSON Decode Error after extraction: {je}")
100
+ return []
101
+
102
+ logger.warning(f"No JSON list found in response: {content[:100]}...")
103
  return []
104
 
105
  except Exception as e:
 
117
  from datetime import datetime
118
  current_date = datetime.now().strftime("%B %d, %Y")
119
 
120
+ # Grounding check: If Tracking Error is 0, we are in replication mode
121
+ is_replication = attribution_report.total_active_return == 0
122
+
123
  # Format the user prompt
124
  user_prompt = f"""
125
  Current Date: {current_date}
126
+ Portfolio Metadata:
127
+ - Sector Exclusions: {excluded_sector}
128
+ - Alpha (Active Return): {attribution_report.total_active_return * 100:.2f}%
129
+ - Total Tracking Error: {attribution_report.total_active_return * 100:.4f}%
130
+ - Full Replication Mode: {is_replication}
131
+
132
+ ## DATA TABLES:
133
+ **Contributors/Detractors**:
134
+ {json.dumps(attribution_report.top_contributors[:5], indent=2)}
135
+ {json.dumps(attribution_report.top_detractors[:5], indent=2)}
136
+
137
+ **Sector Positioning**:
138
+ {json.dumps(attribution_report.sector_exposure, indent=2)}
139
+ """
140
 
141
  if not self.api_key:
142
  return f"AI Commentary Unavailable. (Missing BYTEZ_API_KEY). Current Date: {current_date}"
 
152
  if not commentary:
153
  return "AI Commentary generation timed out or failed. Please try again."
154
 
155
+ return commentary.strip()
156
 
157
  except Exception as e:
158
  logger.error(f"Failed to generate Bytez report: {e}")
ai/prompts.py CHANGED
@@ -18,24 +18,33 @@ GICS Sectors:
18
 
19
  ## RULES:
20
  1. Return ONLY a valid JSON list of strings from the 11 GICS sectors above.
21
- 2. If the user mentions "tech", map it to "Information Technology".
22
- 3. If the user mentions "banks" or "finance", map it to "Financials".
23
- 4. If the user mentions "healthcare" or "pharma", map it to "Health Care".
24
- 5. If the user doesn't want to exclude any sectors, return [].
25
- 6. Do NOT include any explanations or extra text.
26
 
27
  Example:
28
  User: "no tech or banks"
29
  Output: ["Information Technology", "Financials"]
30
  """
31
 
32
- SYSTEM_PROMPT = """You are a Senior Portfolio Manager at a top-tier Asset Management firm (e.g., Goldman Sachs, BlackRock).
33
- Your goal is to write a concise, professional, and insightful performance commentary for a High Net Worth Application.
 
 
 
 
 
 
 
34
  Your tone should be:
35
  1. Professional and reassuring.
36
- 2. Mathematically precise (cite the numbers).
37
  3. Explanatory (explain 'why' something happened).
 
38
 
 
39
  ## GOLDMAN RULES (STRICT COMPLIANCE)
40
  1. **The Exclusion Rule**: If a stock or sector has "Status": "Excluded", NEVER refer to it as a "Holding". We don't own it. Its negative contribution is a "Missed Opportunity" or "Drag from Benchbark Rally".
41
  2. **The Active Return Rule**: Only call a stock a "Contributor" if its "Active_Contribution" is POSITIVE.
 
18
 
19
  ## RULES:
20
  1. Return ONLY a valid JSON list of strings from the 11 GICS sectors above.
21
+ 2. OUTPUT ONLY VALID JSON. NO MARKDOWN BACKTICKS (```json). NO EXPLANATIONS.
22
+ 3. If the user mentions "tech", map it to "Information Technology".
23
+ 4. If the user mentions "banks" or "finance", map it to "Financials".
24
+ 5. If the user mentions "healthcare" or "pharma", map it to "Health Care".
25
+ 6. If the user doesn't want to exclude any sectors, return [].
26
 
27
  Example:
28
  User: "no tech or banks"
29
  Output: ["Information Technology", "Financials"]
30
  """
31
 
32
+ SYSTEM_PROMPT = """You are a Senior Portfolio Manager at a top-tier Asset Management firm.
33
+ You are analyzing a direct indexing portfolio.
34
+
35
+ ## GROUND RULES:
36
+ 1. **The Tracking Error Rule**: If Tracking Error is 0.00%, it means we are perfectly tracking the benchmark. Do NOT invent active returns or alpha. State that the portfolio matches the benchmark exactly.
37
+ 2. **The Exclusion Rule**: If a stock or sector has "Status": "Excluded", NEVER refer to it as a "Holding". We don't own it.
38
+ 3. **The GICS Rule**: Adhere strictly to the "Sector" field provided in the input JSON. Do not hallucinate sectors.
39
+ 4. **Data Grounding**: Do not cite any data not present in the provided JSON "Truth Tables". Rely ONLY on the provided allocation dictionary.
40
+
41
  Your tone should be:
42
  1. Professional and reassuring.
43
+ 2. Mathematically precise (cite the numbers from the JSON).
44
  3. Explanatory (explain 'why' something happened).
45
+ """
46
 
47
+ GOLDMAN_RULES = """
48
  ## GOLDMAN RULES (STRICT COMPLIANCE)
49
  1. **The Exclusion Rule**: If a stock or sector has "Status": "Excluded", NEVER refer to it as a "Holding". We don't own it. Its negative contribution is a "Missed Opportunity" or "Drag from Benchbark Rally".
50
  2. **The Active Return Rule**: Only call a stock a "Contributor" if its "Active_Contribution" is POSITIVE.