vn6295337 Claude Opus 4.5 commited on
Commit
d2df6b8
Β·
1 Parent(s): efcfc92

Layer 1: Add metric reference table to prevent LLM hallucination

Browse files

- Add immutable metric reference table at top of analyzer prompts
- Use exact 'as of' dates (e.g., 2024-09-28) instead of ambiguous FY labels
- Store metric_reference dict and SHA256 hash in state for integrity verification
- Include reference table in both initial and revision prompts
- Instruct LLM to copy values verbatim, not round or estimate

New functions:
- _format_metric_for_reference(): Format single metric with as-of date
- _generate_metric_reference_table(): Build numbered reference table
- _compute_reference_hash(): SHA256 hash for integrity check
- _verify_reference_integrity(): Verify dict hasn't been corrupted

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (2) hide show
  1. src/nodes/analyzer.py +209 -12
  2. src/state.py +3 -0
src/nodes/analyzer.py CHANGED
@@ -734,6 +734,167 @@ def _format_metrics_for_prompt(extracted: dict, is_financial: bool = False) -> s
734
  return "\n".join(lines)
735
 
736
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
737
  # New institutional-grade prompt
738
  ANALYZER_SYSTEM_PROMPT = """You are a senior financial analyst producing institutional-grade SWOT analyses.
739
 
@@ -776,7 +937,8 @@ def _build_revision_prompt(
776
  critique_details: dict,
777
  company_data: str,
778
  current_draft: str,
779
- is_financial: bool
 
780
  ) -> str:
781
  """Build revision prompt with conditional focus areas based on failed criteria.
782
 
@@ -785,10 +947,15 @@ def _build_revision_prompt(
785
  company_data: Formatted metrics string for reference
786
  current_draft: The current SWOT draft to be revised
787
  is_financial: Whether the company is a financial institution
 
788
 
789
  Returns:
790
  Complete revision prompt string
791
  """
 
 
 
 
792
  scores = critique_details.get("scores", {})
793
 
794
  # Determine which focus areas to include based on failed criteria
@@ -828,7 +995,7 @@ def _build_revision_prompt(
828
  if is_financial:
829
  ev_note = "\n**Note:** This is a financial institution - EV/EBITDA is excluded from analysis."
830
 
831
- prompt = f"""## REVISION MODE ACTIVATED
832
 
833
  You previously generated a SWOT analysis that did not meet quality standards. You are now in revision mode.
834
 
@@ -838,6 +1005,7 @@ You previously generated a SWOT analysis that did not meet quality standards. Yo
838
  2. **Address each deficiency** listed in priority order
839
  3. **Preserve strengths** explicitly called out β€” do not regress on what worked
840
  4. **Regenerate the complete SWOT** β€” not a partial patch
 
841
 
842
  ### CRITIC FEEDBACK
843
 
@@ -863,13 +1031,14 @@ Weighted Score: {critique_details.get('weighted_score', 0):.1f} / 10
863
  - Fix every item in "Key Deficiencies" β€” these are blocking issues
864
  - Apply each point in "Actionable Feedback" β€” these are specific instructions
865
  - Keep everything listed under "Strengths to Preserve" β€” do not modify these sections
866
- - Re-verify all metric citations against the original input data
867
- - Ensure temporal labels (TTM, FY, Q) are accurate for each metric
868
  {ev_note}
869
 
870
  **DO NOT:**
871
  - Ignore lower-priority feedback items β€” address all of them
872
  - Introduce new metrics not in the original input data
 
873
  - Remove content that was working well
874
  - Add defensive caveats or apologies about the revision
875
  - Reference the revision process in your output β€” produce a clean SWOT as if first attempt
@@ -896,8 +1065,28 @@ Simply output the improved SWOT as a clean, final deliverable."""
896
  return prompt
897
 
898
 
899
- def _build_analyzer_prompt(company: str, ticker: str, formatted_data: str, is_financial: bool) -> str:
900
- """Build analyzer prompt with conditional EV/EBITDA handling."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
901
 
902
  if is_financial:
903
  ev_note = "\nNote: EV/EBITDA excluded - not meaningful for financial institutions."
@@ -906,7 +1095,7 @@ def _build_analyzer_prompt(company: str, ticker: str, formatted_data: str, is_fi
906
 
907
  system = ANALYZER_SYSTEM_PROMPT.format(ev_ebitda_note=ev_note)
908
 
909
- return f"""{system}
910
 
911
  === DATA FOR {company} ({ticker}) ===
912
  {formatted_data}
@@ -917,13 +1106,13 @@ Produce a SWOT analysis with this exact structure:
917
 
918
  ## Strengths
919
  For each (3-5 points):
920
- - **Finding:** [One sentence with specific metric]
921
  - **Strategic Implication:** [Why this matters]
922
  - **Durability:** [High/Medium/Low]
923
 
924
  ## Weaknesses
925
  For each (3-5 points):
926
- - **Finding:** [One sentence with specific metric]
927
  - **Severity:** [Critical/Moderate/Minor]
928
  - **Trend:** [Improving/Stable/Deteriorating]
929
  - **Remediation Levers:** [What could improve this]
@@ -946,7 +1135,9 @@ For each (3-5 points):
946
  - **Data Gaps:** [Any unavailable metrics]
947
  - **Confidence Level:** [High/Medium/Low]
948
 
949
- Every finding MUST cite a specific number from the data."""
 
 
950
 
951
 
952
  @traceable(name="Analyzer")
@@ -1004,7 +1195,8 @@ def analyzer_node(state, workflow_id=None, progress_store=None):
1004
  critique_details=critique_details,
1005
  company_data=formatted_data,
1006
  current_draft=state.get("draft_report", ""),
1007
- is_financial=is_financial
 
1008
  )
1009
 
1010
  # Update progress with revision info
@@ -1017,7 +1209,12 @@ def analyzer_node(state, workflow_id=None, progress_store=None):
1017
  # INITIAL MODE: Use standard analyzer prompt
1018
  _add_activity_log(workflow_id, progress_store, "analyzer",
1019
  f"Calling LLM to generate SWOT analysis...")
1020
- prompt = _build_analyzer_prompt(company, ticker, formatted_data, is_financial)
 
 
 
 
 
1021
  current_revision = 0
1022
 
1023
  # In revision mode, add delay before LLM call to avoid rate limits
 
734
  return "\n".join(lines)
735
 
736
 
737
+ # ============================================================
738
+ # METRIC REFERENCE TABLE - For Hallucination Prevention (Layer 1)
739
+ # ============================================================
740
+
741
+ import hashlib
742
+
743
+
744
+ def _format_metric_for_reference(key: str, value, temporal_info: dict = None) -> tuple:
745
+ """
746
+ Format a single metric for the reference table with exact as-of date.
747
+
748
+ Returns:
749
+ tuple: (formatted_string, as_of_date)
750
+ """
751
+ if value is None:
752
+ return None, None
753
+
754
+ # Format value based on metric type
755
+ if key in ("revenue", "net_income", "free_cash_flow", "market_cap", "enterprise_value",
756
+ "total_assets", "total_liabilities", "stockholders_equity", "operating_cash_flow"):
757
+ formatted = f"${value:,.0f}"
758
+ elif key in ("net_margin", "gross_margin", "operating_margin", "gdp_growth",
759
+ "inflation", "unemployment", "historical_volatility", "revenue_cagr_3yr"):
760
+ formatted = f"{value:.1f}%"
761
+ elif key in ("interest_rate",):
762
+ formatted = f"{value:.2f}%"
763
+ elif key in ("pe_trailing", "pe_forward", "ps_ratio", "ev_ebitda", "vix"):
764
+ formatted = f"{value:.1f}"
765
+ elif key in ("pb_ratio", "debt_to_equity", "beta"):
766
+ formatted = f"{value:.2f}"
767
+ elif key in ("eps",):
768
+ formatted = f"${value:.2f}"
769
+ elif key in ("composite_score",):
770
+ formatted = f"{value:.1f}"
771
+ else:
772
+ # Default formatting for unknown metrics
773
+ if isinstance(value, float):
774
+ formatted = f"{value:.2f}"
775
+ else:
776
+ formatted = str(value)
777
+
778
+ # Extract actual date (not fiscal period label)
779
+ as_of_date = None
780
+ if temporal_info and isinstance(temporal_info, dict):
781
+ as_of_date = temporal_info.get("end_date") # e.g., "2024-09-28"
782
+
783
+ if as_of_date:
784
+ formatted = f"{formatted} (as of {as_of_date})"
785
+
786
+ return formatted, as_of_date
787
+
788
+
789
+ def _generate_metric_reference_table(extracted: dict, is_financial: bool = False) -> tuple:
790
+ """
791
+ Generate an immutable metric reference table for LLM grounding.
792
+
793
+ Args:
794
+ extracted: Extracted metrics dictionary from _extract_key_metrics()
795
+ is_financial: If True, exclude EV/EBITDA
796
+
797
+ Returns:
798
+ tuple: (table_string, metric_lookup_dict)
799
+ """
800
+ lines = [
801
+ "=" * 60,
802
+ "METRIC REFERENCE TABLE - COPY VALUES EXACTLY AS SHOWN",
803
+ "=" * 60,
804
+ "",
805
+ "CRITICAL INSTRUCTION:",
806
+ "- Copy metric values EXACTLY as shown (including $, %, decimals)",
807
+ "- Do NOT round, estimate, or approximate numbers",
808
+ "- Do NOT invent metrics not listed below",
809
+ "- Include the 'as of' date when citing temporal metrics",
810
+ "",
811
+ ]
812
+
813
+ lookup = {}
814
+ mid = 1
815
+
816
+ # Define categories and their metric keys
817
+ categories = [
818
+ ("FUNDAMENTALS", "fundamentals", [
819
+ "revenue", "net_income", "net_margin", "gross_margin", "operating_margin",
820
+ "eps", "debt_to_equity", "free_cash_flow", "revenue_cagr_3yr"
821
+ ]),
822
+ ("VALUATION", "valuation", [
823
+ "pe_trailing", "pe_forward", "pb_ratio", "ps_ratio", "ev_ebitda"
824
+ ]),
825
+ ("VOLATILITY", "volatility", [
826
+ "beta", "vix", "historical_volatility"
827
+ ]),
828
+ ("MACRO", "macro", [
829
+ "gdp_growth", "interest_rate", "inflation", "unemployment"
830
+ ]),
831
+ ]
832
+
833
+ for label, cat_key, metric_keys in categories:
834
+ data = extracted.get(cat_key, {})
835
+ if not data:
836
+ continue
837
+
838
+ category_lines = []
839
+
840
+ for metric_key in metric_keys:
841
+ metric_val = data.get(metric_key)
842
+ if metric_val is None:
843
+ continue
844
+
845
+ # Skip EV/EBITDA for financial institutions
846
+ if is_financial and metric_key == "ev_ebitda":
847
+ continue
848
+
849
+ # Handle temporal metrics (dict with value and end_date)
850
+ if isinstance(metric_val, dict) and metric_val.get("value") is not None:
851
+ raw_value = metric_val["value"]
852
+ formatted, as_of_date = _format_metric_for_reference(
853
+ metric_key, raw_value, metric_val
854
+ )
855
+ elif isinstance(metric_val, (int, float)):
856
+ raw_value = metric_val
857
+ formatted, as_of_date = _format_metric_for_reference(metric_key, raw_value)
858
+ else:
859
+ continue # Skip non-numeric
860
+
861
+ if formatted:
862
+ ref_id = f"M{mid:02d}"
863
+ category_lines.append(f" {ref_id}: {metric_key} = {formatted}")
864
+ lookup[ref_id] = {
865
+ "key": metric_key,
866
+ "raw_value": raw_value,
867
+ "formatted": formatted,
868
+ "as_of_date": as_of_date,
869
+ "category": cat_key
870
+ }
871
+ mid += 1
872
+
873
+ if category_lines:
874
+ lines.append(f"[{label}]")
875
+ lines.extend(category_lines)
876
+ lines.append("")
877
+
878
+ lines.append("=" * 60)
879
+ lines.append("")
880
+
881
+ return "\n".join(lines), lookup
882
+
883
+
884
+ def _compute_reference_hash(metric_lookup: dict) -> str:
885
+ """Compute SHA256 hash of metric lookup for integrity verification."""
886
+ # Sort keys for deterministic serialization
887
+ serialized = json.dumps(metric_lookup, sort_keys=True, default=str)
888
+ return hashlib.sha256(serialized.encode()).hexdigest()
889
+
890
+
891
+ def _verify_reference_integrity(metric_lookup: dict, stored_hash: str) -> bool:
892
+ """Verify metric lookup hasn't been corrupted."""
893
+ if not metric_lookup or not stored_hash:
894
+ return False
895
+ return _compute_reference_hash(metric_lookup) == stored_hash
896
+
897
+
898
  # New institutional-grade prompt
899
  ANALYZER_SYSTEM_PROMPT = """You are a senior financial analyst producing institutional-grade SWOT analyses.
900
 
 
937
  critique_details: dict,
938
  company_data: str,
939
  current_draft: str,
940
+ is_financial: bool,
941
+ extracted: dict = None
942
  ) -> str:
943
  """Build revision prompt with conditional focus areas based on failed criteria.
944
 
 
947
  company_data: Formatted metrics string for reference
948
  current_draft: The current SWOT draft to be revised
949
  is_financial: Whether the company is a financial institution
950
+ extracted: Extracted metrics dict for reference table generation
951
 
952
  Returns:
953
  Complete revision prompt string
954
  """
955
+ # Generate metric reference table for revision (same as initial mode)
956
+ reference_table = ""
957
+ if extracted:
958
+ reference_table, _ = _generate_metric_reference_table(extracted, is_financial)
959
  scores = critique_details.get("scores", {})
960
 
961
  # Determine which focus areas to include based on failed criteria
 
995
  if is_financial:
996
  ev_note = "\n**Note:** This is a financial institution - EV/EBITDA is excluded from analysis."
997
 
998
+ prompt = f"""{reference_table}## REVISION MODE ACTIVATED
999
 
1000
  You previously generated a SWOT analysis that did not meet quality standards. You are now in revision mode.
1001
 
 
1005
  2. **Address each deficiency** listed in priority order
1006
  3. **Preserve strengths** explicitly called out β€” do not regress on what worked
1007
  4. **Regenerate the complete SWOT** β€” not a partial patch
1008
+ 5. **Use EXACT values from the METRIC REFERENCE TABLE above** β€” do not round or estimate
1009
 
1010
  ### CRITIC FEEDBACK
1011
 
 
1031
  - Fix every item in "Key Deficiencies" β€” these are blocking issues
1032
  - Apply each point in "Actionable Feedback" β€” these are specific instructions
1033
  - Keep everything listed under "Strengths to Preserve" β€” do not modify these sections
1034
+ - **Use EXACT metric values from the METRIC REFERENCE TABLE** β€” copy numbers verbatim
1035
+ - Include the 'as of' date when citing temporal metrics
1036
  {ev_note}
1037
 
1038
  **DO NOT:**
1039
  - Ignore lower-priority feedback items β€” address all of them
1040
  - Introduce new metrics not in the original input data
1041
+ - **Round, estimate, or approximate any numbers** β€” use exact values only
1042
  - Remove content that was working well
1043
  - Add defensive caveats or apologies about the revision
1044
  - Reference the revision process in your output β€” produce a clean SWOT as if first attempt
 
1065
  return prompt
1066
 
1067
 
1068
+ def _build_analyzer_prompt(company: str, ticker: str, formatted_data: str,
1069
+ is_financial: bool, extracted: dict = None) -> tuple:
1070
+ """Build analyzer prompt with metric reference table for hallucination prevention.
1071
+
1072
+ Args:
1073
+ company: Company name
1074
+ ticker: Stock ticker
1075
+ formatted_data: Formatted metrics text
1076
+ is_financial: If True, exclude EV/EBITDA
1077
+ extracted: Extracted metrics dict (for reference table generation)
1078
+
1079
+ Returns:
1080
+ tuple: (prompt_string, metric_lookup_dict, reference_hash)
1081
+ """
1082
+ # Generate metric reference table if extracted data is available
1083
+ reference_table = ""
1084
+ metric_lookup = {}
1085
+ ref_hash = ""
1086
+
1087
+ if extracted:
1088
+ reference_table, metric_lookup = _generate_metric_reference_table(extracted, is_financial)
1089
+ ref_hash = _compute_reference_hash(metric_lookup)
1090
 
1091
  if is_financial:
1092
  ev_note = "\nNote: EV/EBITDA excluded - not meaningful for financial institutions."
 
1095
 
1096
  system = ANALYZER_SYSTEM_PROMPT.format(ev_ebitda_note=ev_note)
1097
 
1098
+ prompt = f"""{reference_table}{system}
1099
 
1100
  === DATA FOR {company} ({ticker}) ===
1101
  {formatted_data}
 
1106
 
1107
  ## Strengths
1108
  For each (3-5 points):
1109
+ - **Finding:** [One sentence with specific metric from the METRIC REFERENCE TABLE]
1110
  - **Strategic Implication:** [Why this matters]
1111
  - **Durability:** [High/Medium/Low]
1112
 
1113
  ## Weaknesses
1114
  For each (3-5 points):
1115
+ - **Finding:** [One sentence with specific metric from the METRIC REFERENCE TABLE]
1116
  - **Severity:** [Critical/Moderate/Minor]
1117
  - **Trend:** [Improving/Stable/Deteriorating]
1118
  - **Remediation Levers:** [What could improve this]
 
1135
  - **Data Gaps:** [Any unavailable metrics]
1136
  - **Confidence Level:** [High/Medium/Low]
1137
 
1138
+ CRITICAL: Every numeric finding MUST use the EXACT value from the METRIC REFERENCE TABLE above. Do NOT round or estimate."""
1139
+
1140
+ return prompt, metric_lookup, ref_hash
1141
 
1142
 
1143
  @traceable(name="Analyzer")
 
1195
  critique_details=critique_details,
1196
  company_data=formatted_data,
1197
  current_draft=state.get("draft_report", ""),
1198
+ is_financial=is_financial,
1199
+ extracted=extracted
1200
  )
1201
 
1202
  # Update progress with revision info
 
1209
  # INITIAL MODE: Use standard analyzer prompt
1210
  _add_activity_log(workflow_id, progress_store, "analyzer",
1211
  f"Calling LLM to generate SWOT analysis...")
1212
+ prompt, metric_lookup, ref_hash = _build_analyzer_prompt(
1213
+ company, ticker, formatted_data, is_financial, extracted
1214
+ )
1215
+ # Store metric reference for validation (Layer 1 hallucination prevention)
1216
+ state["metric_reference"] = metric_lookup
1217
+ state["metric_reference_hash"] = ref_hash
1218
  current_revision = 0
1219
 
1220
  # In revision mode, add delay before LLM call to avoid rate limits
src/state.py CHANGED
@@ -22,3 +22,6 @@ class AgentState(TypedDict):
22
  progress_store: Optional[Any] # Reference to WORKFLOWS dict
23
  # Error handling - abort workflow on critical failures
24
  error: Optional[str] # Set when LLM providers fail, causes workflow to abort
 
 
 
 
22
  progress_store: Optional[Any] # Reference to WORKFLOWS dict
23
  # Error handling - abort workflow on critical failures
24
  error: Optional[str] # Set when LLM providers fail, causes workflow to abort
25
+ # Metric reference for hallucination prevention (Layer 1)
26
+ metric_reference: Optional[dict] # {M01: {key, raw_value, formatted, as_of_date}, ...}
27
+ metric_reference_hash: Optional[str] # SHA256 hash for integrity verification