Spaces:

AJAYKASU
/

QuantScaleAI

Sleeping

App Files Files Community

AJAY KASU commited on Feb 27

Commit

2750cce

1 Parent(s): b14afd7

Refactor: Replace regex sector exclusion with LLM-based Intent Parser

Browse files

Files changed (7) hide show

ai/ai_reporter.py +33 -0
ai/prompts.py +30 -1
api/app.py +11 -0
core/schema.py +1 -0
data/optimizer.py +3 -11
main.py +6 -0
streamlit_app.py +3 -32

ai/ai_reporter.py CHANGED Viewed

@@ -77,3 +77,36 @@ INSTRUCTION: Start your commentary exactly with the header: "Market Commentary -
         except Exception as e:
             logger.error(f"Failed to generate AI report: {e}")
             return "Error generating commentary. Please check API connection."

         except Exception as e:
             logger.error(f"Failed to generate AI report: {e}")
             return "Error generating commentary. Please check API connection."
+    def parse_intent(self, user_prompt: str) -> list:
+        """
+        Uses LLM to map user prompt to a list of exact GICS sectors to exclude.
+        """
+        if not self.client:
+            logger.warning("LLM Client unavailable for Intent Parsing. Falling back to empty list.")
+            return []
+        from ai.prompts import INTENT_PARSER_SYSTEM_PROMPT
+        try:
+            response = self.client.chat_completion(
+                model=self.model_id,
+                messages=[
+                    {"role": "system", "content": INTENT_PARSER_SYSTEM_PROMPT},
+                    {"role": "user", "content": f"Parse this prompt for sector exclusions: '{user_prompt}'"}
+                ],
+                max_tokens=100,
+                temperature=0.0 # Strict output
+            )
+            content = response.choices[0].message.content.strip()
+            # Find the JSON list in the response
+            import re
+            match = re.search(r'\[.*\]', content, re.DOTALL)
+            if match:
+                import json
+                return json.loads(match.group(0))
+            return []
+        except Exception as e:
+            logger.error(f"Intent Parsing failed: {e}")
+            return []

ai/prompts.py CHANGED Viewed

@@ -1,5 +1,34 @@
 # System Prompt for the Portfolio Manager Persona
-# System Prompt for the Portfolio Manager Persona
 SYSTEM_PROMPT = """You are a Senior Portfolio Manager at a top-tier Asset Management firm (e.g., Goldman Sachs, BlackRock).
 Your goal is to write a concise, professional, and insightful performance commentary for a High Net Worth Application.
 Your tone should be:

 # System Prompt for the Portfolio Manager Persona
+# System Prompt for the Intent Parser
+INTENT_PARSER_SYSTEM_PROMPT = """You are a financial data parser.
+Your task is to identify which of the following 11 GICS sectors a user wants to EXCLUDE from their portfolio based on their prompt.
+GICS Sectors:
+1. Information Technology
+2. Health Care
+3. Financials
+4. Consumer Discretionary
+5. Communication Services
+6. Industrials
+7. Consumer Staples
+8. Energy
+9. Utilities
+10. Real Estate
+11. Materials
+## RULES:
+1. Return ONLY a valid JSON list of strings from the 11 GICS sectors above.
+2. If the user mentions "tech", map it to "Information Technology".
+3. If the user mentions "banks" or "finance", map it to "Financials".
+4. If the user mentions "healthcare" or "pharma", map it to "Health Care".
+5. If the user doesn't want to exclude any sectors, return [].
+6. Do NOT include any explanations or extra text.
+Example:
+User: "no tech or banks"
+Output: ["Information Technology", "Financials"]
+"""
 SYSTEM_PROMPT = """You are a Senior Portfolio Manager at a top-tier Asset Management firm (e.g., Goldman Sachs, BlackRock).
 Your goal is to write a concise, professional, and insightful performance commentary for a High Net Worth Application.
 Your tone should be:

api/app.py CHANGED Viewed

@@ -26,12 +26,23 @@ def root():
 def health_check():
     return {"status": "healthy", "service": "QuantScale AI Direct Indexing"}
 @app.post("/optimize", response_model=dict)
 def optimize_portfolio(request: OptimizationRequest):
     """
     Optimizes a portfolio based on exclusions and generates an AI Attribution report.
     """
     try:
         result = system.run_pipeline(request)
         if not result:
             raise HTTPException(status_code=500, detail="Pipeline failed to execute.")

 def health_check():
     return {"status": "healthy", "service": "QuantScale AI Direct Indexing"}
+def parse_constraints_with_llm(user_prompt: str) -> list:
+    """
+    Dedicated parser function in the API layer.
+    Maps natural language to exact GICS sectors.
+    """
+    return system.ai_reporter.parse_intent(user_prompt)
 @app.post("/optimize", response_model=dict)
 def optimize_portfolio(request: OptimizationRequest):
     """
     Optimizes a portfolio based on exclusions and generates an AI Attribution report.
     """
     try:
+        # If the request contains a raw prompt but no sectors, parse it here
+        if request.user_prompt and not request.excluded_sectors:
+             request.excluded_sectors = parse_constraints_with_llm(request.user_prompt)
         result = system.run_pipeline(request)
         if not result:
             raise HTTPException(status_code=500, detail="Pipeline failed to execute.")

core/schema.py CHANGED Viewed

@@ -30,6 +30,7 @@ class OptimizationRequest(BaseModel):
     strategy: Optional[str] = Field(None, description="Global Filter Strategy: 'smallest_market_cap' or 'largest_market_cap'")
     top_n: Optional[int] = Field(None, description="Number of assets to select for strategy (e.g. 50)")
     benchmark: str = "^GSPC"
     class Config:
         json_schema_extra = {

     strategy: Optional[str] = Field(None, description="Global Filter Strategy: 'smallest_market_cap' or 'largest_market_cap'")
     top_n: Optional[int] = Field(None, description="Number of assets to select for strategy (e.g. 50)")
     benchmark: str = "^GSPC"
+    user_prompt: Optional[str] = Field(None, description="Raw user input for LLM intent parsing")
     class Config:
         json_schema_extra = {

data/optimizer.py CHANGED Viewed

@@ -74,17 +74,9 @@ class PortfolioOptimizer:
             logger.info(f"Applying Sector Exclusion Validation for: {excluded_sectors}")
             for i, ticker in enumerate(tickers):
                 sector = sector_map.get(ticker, "Unknown")
-                # Normalize both for robust matching (e.g., "Health Care" vs "Healthcare")
-                sector_norm = sector.lower().replace(" ", "").replace("-", "")
-                for excl in excluded_sectors:
-                    excl_norm = excl.lower().replace(" ", "").replace("-", "")
-                    # Match if normalized strings are equal OR special mapping for Tech
-                    if excl_norm == sector_norm or (excl_norm == "tech" and sector_norm == "informationtechnology"):
-                        excluded_indices.append(i)
-                        mask_vector[i] = 1
-                        break
         # Ticker Exclusions (NEW)
         if excluded_tickers:

             logger.info(f"Applying Sector Exclusion Validation for: {excluded_sectors}")
             for i, ticker in enumerate(tickers):
                 sector = sector_map.get(ticker, "Unknown")
+                if sector in excluded_sectors:
+                    excluded_indices.append(i)
+                    mask_vector[i] = 1
         # Ticker Exclusions (NEW)
         if excluded_tickers:

main.py CHANGED Viewed

@@ -27,6 +27,12 @@ class QuantScaleSystem:
     def run_pipeline(self, request: OptimizationRequest):
         logger.info(f"Starting pipeline for Client {request.client_id}...")
         # 1. Fetch Universe (S&P 500)
         tickers = self.data_engine.fetch_sp500_tickers()

     def run_pipeline(self, request: OptimizationRequest):
         logger.info(f"Starting pipeline for Client {request.client_id}...")
+        # 0. LLM Intent Parsing (New)
+        if request.user_prompt and not request.excluded_sectors:
+            logger.info(f"Parsing user intent: '{request.user_prompt}'")
+            request.excluded_sectors = self.ai_reporter.parse_intent(request.user_prompt)
+            logger.info(f"LLM Mapped Exclusions: {request.excluded_sectors}")
         # 1. Fetch Universe (S&P 500)
         tickers = self.data_engine.fetch_sp500_tickers()

streamlit_app.py CHANGED Viewed

@@ -61,21 +61,6 @@ st.markdown("""
 </style>
 """, unsafe_allow_html=True)
-# --- Constants ---
-SECTOR_KEYWORDS = {
-    "Energy": ["energy", "oil", "gas"],
-    "Technology": ["technology", "tech", "software", "it"],
-    "Financials": ["financials", "finance", "banks"],
-    "Healthcare": ["healthcare", "health", "pharma"],
-    "Utilities": ["utilities", "utility"],
-    "Materials": ["materials", "mining"],
-    "Consumer Discretionary": ["consumer", "retail", "discretionary"],
-    "Real Estate": ["real estate", "reit"],
-    "Communication Services": ["communication", "media", "telecom"]
-}
-INCLUDE_KEYWORDS = ["keep", "include", "with", "stay", "portfolio", "only"]
 # --- Parsers ---
 def parse_investment_amount(text: str) -> float:
     text = text.replace(",", "")
@@ -89,20 +74,6 @@ def parse_investment_amount(text: str) -> float:
     return 100_000.0
-def parse_excluded_sectors(text: str) -> list:
-    lower = text.lower()
-    excluded = []
-    for sector, keywords in SECTOR_KEYWORDS.items():
-        if any(k in lower for k in keywords):
-            inc_pattern = re.compile(
-                rf'({"|".join(INCLUDE_KEYWORDS)})\s+(the\s+)?({"|".join([sector.lower()] + keywords)})',
-                re.IGNORECASE
-            )
-            if not inc_pattern.search(lower):
-                excluded.append(sector)
-    return excluded
 def parse_strategy(text: str):
     lower = text.lower()
     strategy, top_n = None, None
@@ -148,17 +119,17 @@ run_btn = st.button("🚀 Generate Portfolio Strategy", use_container_width=True
 if run_btn and user_input:
     investment_amount = parse_investment_amount(user_input)
-    excluded_sectors = parse_excluded_sectors(user_input)
     strategy, top_n = parse_strategy(user_input)
     request = OptimizationRequest(
         client_id="StreamlitUser",
         initial_investment=investment_amount,
-        excluded_sectors=excluded_sectors,
         excluded_tickers=[],
         strategy=strategy,
         top_n=top_n,
-        benchmark="^GSPC"
     )
     with st.spinner("⚙️ Running Convex Optimization & AI Analysis..."):

 </style>
 """, unsafe_allow_html=True)
 # --- Parsers ---
 def parse_investment_amount(text: str) -> float:
     text = text.replace(",", "")
     return 100_000.0
 def parse_strategy(text: str):
     lower = text.lower()
     strategy, top_n = None, None
 if run_btn and user_input:
     investment_amount = parse_investment_amount(user_input)
     strategy, top_n = parse_strategy(user_input)
     request = OptimizationRequest(
         client_id="StreamlitUser",
         initial_investment=investment_amount,
+        excluded_sectors=[], # Let the LLM derive this
         excluded_tickers=[],
         strategy=strategy,
         top_n=top_n,
+        benchmark="^GSPC",
+        user_prompt=user_input
     )
     with st.spinner("⚙️ Running Convex Optimization & AI Analysis..."):