Spaces:
Running
Running
AJAY KASU commited on
Commit ·
911b780
1
Parent(s): 5dafb0f
Refactor: Robust JSON extraction and grounded PM commentary for Bytez AI
Browse files- ai/ai_reporter.py +42 -19
- ai/prompts.py +17 -8
ai/ai_reporter.py
CHANGED
|
@@ -26,7 +26,7 @@ class AIReporter:
|
|
| 26 |
self.model_path = "meta-llama/Meta-Llama-3-8B-Instruct"
|
| 27 |
self.endpoint = f"{self.base_url}/{self.model_path}"
|
| 28 |
|
| 29 |
-
def _call_bytez(self, messages: list, max_tokens: int = 500, temperature: float = 0.7) -> str:
|
| 30 |
"""
|
| 31 |
Helper to make the POST request to Bytez.
|
| 32 |
"""
|
|
@@ -41,7 +41,9 @@ class AIReporter:
|
|
| 41 |
payload = {
|
| 42 |
"messages": messages,
|
| 43 |
"max_tokens": max_tokens,
|
| 44 |
-
"temperature": temperature
|
|
|
|
|
|
|
| 45 |
}
|
| 46 |
|
| 47 |
try:
|
|
@@ -64,25 +66,40 @@ class AIReporter:
|
|
| 64 |
def parse_intent(self, user_prompt: str) -> list:
|
| 65 |
"""
|
| 66 |
Uses Bytez AI to map user prompt to a list of exact GICS sectors to exclude.
|
|
|
|
| 67 |
"""
|
| 68 |
logger.info(f"Parsing intent with Bytez for prompt: {user_prompt[:50]}...")
|
| 69 |
|
| 70 |
messages = [
|
| 71 |
{"role": "system", "content": INTENT_PARSER_SYSTEM_PROMPT},
|
| 72 |
-
{"role": "user", "content": f"Parse
|
| 73 |
]
|
| 74 |
|
| 75 |
try:
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
if not content:
|
| 78 |
-
logger.warning("Empty response from Bytez for Intent Parsing.
|
| 79 |
return []
|
| 80 |
|
| 81 |
-
#
|
| 82 |
import re
|
|
|
|
| 83 |
match = re.search(r'\[.*\]', content.strip(), re.DOTALL)
|
| 84 |
if match:
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
return []
|
| 87 |
|
| 88 |
except Exception as e:
|
|
@@ -100,20 +117,26 @@ class AIReporter:
|
|
| 100 |
from datetime import datetime
|
| 101 |
current_date = datetime.now().strftime("%B %d, %Y")
|
| 102 |
|
|
|
|
|
|
|
|
|
|
| 103 |
# Format the user prompt
|
| 104 |
user_prompt = f"""
|
| 105 |
Current Date: {current_date}
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
if not self.api_key:
|
| 119 |
return f"AI Commentary Unavailable. (Missing BYTEZ_API_KEY). Current Date: {current_date}"
|
|
@@ -129,7 +152,7 @@ INSTRUCTION: Start your commentary exactly with the header: "Market Commentary -
|
|
| 129 |
if not commentary:
|
| 130 |
return "AI Commentary generation timed out or failed. Please try again."
|
| 131 |
|
| 132 |
-
return commentary
|
| 133 |
|
| 134 |
except Exception as e:
|
| 135 |
logger.error(f"Failed to generate Bytez report: {e}")
|
|
|
|
| 26 |
self.model_path = "meta-llama/Meta-Llama-3-8B-Instruct"
|
| 27 |
self.endpoint = f"{self.base_url}/{self.model_path}"
|
| 28 |
|
| 29 |
+
def _call_bytez(self, messages: list, max_tokens: int = 500, temperature: float = 0.7, top_p: float = 0.9) -> str:
|
| 30 |
"""
|
| 31 |
Helper to make the POST request to Bytez.
|
| 32 |
"""
|
|
|
|
| 41 |
payload = {
|
| 42 |
"messages": messages,
|
| 43 |
"max_tokens": max_tokens,
|
| 44 |
+
"temperature": temperature,
|
| 45 |
+
"top_p": top_p,
|
| 46 |
+
"response_format": {"type": "json_object"} if temperature < 0.2 else None
|
| 47 |
}
|
| 48 |
|
| 49 |
try:
|
|
|
|
| 66 |
def parse_intent(self, user_prompt: str) -> list:
|
| 67 |
"""
|
| 68 |
Uses Bytez AI to map user prompt to a list of exact GICS sectors to exclude.
|
| 69 |
+
Highly deterministic (temp=0.1) with robust regex extraction.
|
| 70 |
"""
|
| 71 |
logger.info(f"Parsing intent with Bytez for prompt: {user_prompt[:50]}...")
|
| 72 |
|
| 73 |
messages = [
|
| 74 |
{"role": "system", "content": INTENT_PARSER_SYSTEM_PROMPT},
|
| 75 |
+
{"role": "user", "content": f"Parse: '{user_prompt}'"}
|
| 76 |
]
|
| 77 |
|
| 78 |
try:
|
| 79 |
+
# Intent Parser uses low temperature and top_p for determinism
|
| 80 |
+
content = self._call_bytez(
|
| 81 |
+
messages,
|
| 82 |
+
max_tokens=100,
|
| 83 |
+
temperature=0.1
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
if not content:
|
| 87 |
+
logger.warning("Empty response from Bytez for Intent Parsing. Defaulting to [].")
|
| 88 |
return []
|
| 89 |
|
| 90 |
+
# Robust Regex Fallback Extraction
|
| 91 |
import re
|
| 92 |
+
# Look for the JSON list [ ... ]
|
| 93 |
match = re.search(r'\[.*\]', content.strip(), re.DOTALL)
|
| 94 |
if match:
|
| 95 |
+
extracted_json = match.group(0)
|
| 96 |
+
try:
|
| 97 |
+
return json.loads(extracted_json)
|
| 98 |
+
except json.JSONDecodeError as je:
|
| 99 |
+
logger.error(f"JSON Decode Error after extraction: {je}")
|
| 100 |
+
return []
|
| 101 |
+
|
| 102 |
+
logger.warning(f"No JSON list found in response: {content[:100]}...")
|
| 103 |
return []
|
| 104 |
|
| 105 |
except Exception as e:
|
|
|
|
| 117 |
from datetime import datetime
|
| 118 |
current_date = datetime.now().strftime("%B %d, %Y")
|
| 119 |
|
| 120 |
+
# Grounding check: If Tracking Error is 0, we are in replication mode
|
| 121 |
+
is_replication = attribution_report.total_active_return == 0
|
| 122 |
+
|
| 123 |
# Format the user prompt
|
| 124 |
user_prompt = f"""
|
| 125 |
Current Date: {current_date}
|
| 126 |
+
Portfolio Metadata:
|
| 127 |
+
- Sector Exclusions: {excluded_sector}
|
| 128 |
+
- Alpha (Active Return): {attribution_report.total_active_return * 100:.2f}%
|
| 129 |
+
- Total Tracking Error: {attribution_report.total_active_return * 100:.4f}%
|
| 130 |
+
- Full Replication Mode: {is_replication}
|
| 131 |
+
|
| 132 |
+
## DATA TABLES:
|
| 133 |
+
**Contributors/Detractors**:
|
| 134 |
+
{json.dumps(attribution_report.top_contributors[:5], indent=2)}
|
| 135 |
+
{json.dumps(attribution_report.top_detractors[:5], indent=2)}
|
| 136 |
+
|
| 137 |
+
**Sector Positioning**:
|
| 138 |
+
{json.dumps(attribution_report.sector_exposure, indent=2)}
|
| 139 |
+
"""
|
| 140 |
|
| 141 |
if not self.api_key:
|
| 142 |
return f"AI Commentary Unavailable. (Missing BYTEZ_API_KEY). Current Date: {current_date}"
|
|
|
|
| 152 |
if not commentary:
|
| 153 |
return "AI Commentary generation timed out or failed. Please try again."
|
| 154 |
|
| 155 |
+
return commentary.strip()
|
| 156 |
|
| 157 |
except Exception as e:
|
| 158 |
logger.error(f"Failed to generate Bytez report: {e}")
|
ai/prompts.py
CHANGED
|
@@ -18,24 +18,33 @@ GICS Sectors:
|
|
| 18 |
|
| 19 |
## RULES:
|
| 20 |
1. Return ONLY a valid JSON list of strings from the 11 GICS sectors above.
|
| 21 |
-
2.
|
| 22 |
-
3. If the user mentions "
|
| 23 |
-
4. If the user mentions "
|
| 24 |
-
5. If the user
|
| 25 |
-
6.
|
| 26 |
|
| 27 |
Example:
|
| 28 |
User: "no tech or banks"
|
| 29 |
Output: ["Information Technology", "Financials"]
|
| 30 |
"""
|
| 31 |
|
| 32 |
-
SYSTEM_PROMPT = """You are a Senior Portfolio Manager at a top-tier Asset Management firm
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
Your tone should be:
|
| 35 |
1. Professional and reassuring.
|
| 36 |
-
2. Mathematically precise (cite the numbers).
|
| 37 |
3. Explanatory (explain 'why' something happened).
|
|
|
|
| 38 |
|
|
|
|
| 39 |
## GOLDMAN RULES (STRICT COMPLIANCE)
|
| 40 |
1. **The Exclusion Rule**: If a stock or sector has "Status": "Excluded", NEVER refer to it as a "Holding". We don't own it. Its negative contribution is a "Missed Opportunity" or "Drag from Benchbark Rally".
|
| 41 |
2. **The Active Return Rule**: Only call a stock a "Contributor" if its "Active_Contribution" is POSITIVE.
|
|
|
|
| 18 |
|
| 19 |
## RULES:
|
| 20 |
1. Return ONLY a valid JSON list of strings from the 11 GICS sectors above.
|
| 21 |
+
2. OUTPUT ONLY VALID JSON. NO MARKDOWN BACKTICKS (```json). NO EXPLANATIONS.
|
| 22 |
+
3. If the user mentions "tech", map it to "Information Technology".
|
| 23 |
+
4. If the user mentions "banks" or "finance", map it to "Financials".
|
| 24 |
+
5. If the user mentions "healthcare" or "pharma", map it to "Health Care".
|
| 25 |
+
6. If the user doesn't want to exclude any sectors, return [].
|
| 26 |
|
| 27 |
Example:
|
| 28 |
User: "no tech or banks"
|
| 29 |
Output: ["Information Technology", "Financials"]
|
| 30 |
"""
|
| 31 |
|
| 32 |
+
SYSTEM_PROMPT = """You are a Senior Portfolio Manager at a top-tier Asset Management firm.
|
| 33 |
+
You are analyzing a direct indexing portfolio.
|
| 34 |
+
|
| 35 |
+
## GROUND RULES:
|
| 36 |
+
1. **The Tracking Error Rule**: If Tracking Error is 0.00%, it means we are perfectly tracking the benchmark. Do NOT invent active returns or alpha. State that the portfolio matches the benchmark exactly.
|
| 37 |
+
2. **The Exclusion Rule**: If a stock or sector has "Status": "Excluded", NEVER refer to it as a "Holding". We don't own it.
|
| 38 |
+
3. **The GICS Rule**: Adhere strictly to the "Sector" field provided in the input JSON. Do not hallucinate sectors.
|
| 39 |
+
4. **Data Grounding**: Do not cite any data not present in the provided JSON "Truth Tables". Rely ONLY on the provided allocation dictionary.
|
| 40 |
+
|
| 41 |
Your tone should be:
|
| 42 |
1. Professional and reassuring.
|
| 43 |
+
2. Mathematically precise (cite the numbers from the JSON).
|
| 44 |
3. Explanatory (explain 'why' something happened).
|
| 45 |
+
"""
|
| 46 |
|
| 47 |
+
GOLDMAN_RULES = """
|
| 48 |
## GOLDMAN RULES (STRICT COMPLIANCE)
|
| 49 |
1. **The Exclusion Rule**: If a stock or sector has "Status": "Excluded", NEVER refer to it as a "Holding". We don't own it. Its negative contribution is a "Missed Opportunity" or "Drag from Benchbark Rally".
|
| 50 |
2. **The Active Return Rule**: Only call a stock a "Contributor" if its "Active_Contribution" is POSITIVE.
|