Spaces:
Running
Running
File size: 5,509 Bytes
b0e15c1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | """
Reference data extractor from Processed_data.xlsx
This module extracts BM Index, Category Average, and fund weightage data that the advisor
has manually filled in Processed_data.xlsx, so we can use it when processing
raw CSV files that have blank BM/Category rows.
"""
import openpyxl
from typing import Dict, Any, Optional, Tuple
from pathlib import Path
def extract_reference_data(processed_xlsx_path: str) -> Tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]], Dict[str, int]]:
"""
Extract BM Index, Category Average, and fund weightage data from Processed_data.xlsx.
Returns:
(bm_data, cat_avg_data, fund_weightages) where:
- bm_data: dict mapping category name to CAGR values for BM Index
- cat_avg_data: dict mapping category name to CAGR/values for Category Average
- fund_weightages: dict mapping fund name to manually adjusted weightage value
"""
xlsx_path = Path(processed_xlsx_path)
if not xlsx_path.exists():
print(f"Warning: Reference file not found: {processed_xlsx_path}")
return {}, {}, {}
wb = openpyxl.load_workbook(str(xlsx_path))
ws = wb.active
bm_data = {}
cat_avg_data = {}
fund_weightages = {}
current_category = None
# Find the Weightage column index by scanning the header row
weightage_col_idx = None
for col_idx in range(1, ws.max_column + 1):
header_val = ws.cell(1, col_idx).value
if header_val and 'Weightage' in str(header_val):
weightage_col_idx = col_idx
break
for i in range(1, ws.max_row + 1):
cell_val = ws.cell(i, 1).value
# Check if it's a category header
if cell_val and ':' in str(cell_val) and any(x in str(cell_val) for x in ['Equity', 'Debt', 'Hybrid', 'Solution', 'Other']):
current_category = cell_val
# Check if it's BM Index row
elif cell_val == 'BM Index' and current_category:
bm_1y = ws.cell(i, 6).value
bm_3y = ws.cell(i, 7).value
bm_5y = ws.cell(i, 8).value
bm_10y = ws.cell(i, 9).value
# Only store if at least one value is present
if any([bm_1y, bm_3y, bm_5y, bm_10y]):
bm_data[current_category] = {
'cagr_1y': bm_1y,
'cagr_3y': bm_3y,
'cagr_5y': bm_5y,
'cagr_10y': bm_10y
}
# Check if it's Category Average row
elif cell_val == 'Category Average' and current_category:
cat_1y = ws.cell(i, 6).value
cat_3y = ws.cell(i, 7).value
cat_5y = ws.cell(i, 8).value
cat_10y = ws.cell(i, 9).value
pe = ws.cell(i, 12).value
pb = ws.cell(i, 13).value
# Only store if at least one CAGR value is present
if any([cat_1y, cat_3y, cat_5y, cat_10y]):
cat_avg_data[current_category] = {
'cagr_1y': cat_1y,
'cagr_3y': cat_3y,
'cagr_5y': cat_5y,
'cagr_10y': cat_10y,
'pe_ratio': pe,
'pb_ratio': pb
}
# Check if it's a fund row (not category header, BM Index, or Category Average)
elif cell_val and cell_val not in ['BM Index', 'Category Average', 'Fund'] and current_category:
# Extract fund name
fund_name = str(cell_val).strip()
# Extract weightage if we found the Weightage column
if weightage_col_idx:
weightage_val = ws.cell(i, weightage_col_idx).value
if weightage_val is not None:
try:
# Convert to int if possible, otherwise round float to nearest int
if isinstance(weightage_val, float):
fund_weightages[fund_name] = int(round(weightage_val))
else:
fund_weightages[fund_name] = int(weightage_val)
except (ValueError, TypeError):
# If conversion fails, skip this fund
pass
wb.close()
print(f"Loaded reference data: {len(bm_data)} categories with BM Index, {len(cat_avg_data)} with Category Average, {len(fund_weightages)} fund weightages")
return bm_data, cat_avg_data, fund_weightages
def get_fund_weightage_from_reference(fund_name: str, fund_weightages: Dict[str, int]) -> Optional[int]:
"""
Get the manually adjusted weightage for a fund from reference data.
Args:
fund_name: Name of the fund
fund_weightages: Dictionary of fund name to weightage from Processed_data.xlsx
Returns:
Weightage value if found, None otherwise
"""
# Try exact match first
if fund_name in fund_weightages:
return fund_weightages[fund_name]
# Try partial match (in case of slight name differences)
for ref_fund_name, weightage in fund_weightages.items():
if fund_name.lower() in ref_fund_name.lower() or ref_fund_name.lower() in fund_name.lower():
return weightage
return None
# Default reference file path
DEFAULT_REFERENCE_PATH = "PS/Processed data.xlsx"
# Default reference file path
DEFAULT_REFERENCE_PATH = "PS/Processed data.xlsx"
|