Spaces:

Parthiban97
/

MF

Running

App Files Files Community

MF / src /reference_data.py

Parthiban97

Upload 15 files

b0e15c1 verified 4 days ago

raw

history blame contribute delete

5.51 kB

	"""
	Reference data extractor from Processed_data.xlsx

	This module extracts BM Index, Category Average, and fund weightage data that the advisor
	has manually filled in Processed_data.xlsx, so we can use it when processing
	raw CSV files that have blank BM/Category rows.
	"""

	import openpyxl
	from typing import Dict, Any, Optional, Tuple
	from pathlib import Path


	def extract_reference_data(processed_xlsx_path: str) -> Tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]], Dict[str, int]]:
	"""
	Extract BM Index, Category Average, and fund weightage data from Processed_data.xlsx.

	Returns:
	(bm_data, cat_avg_data, fund_weightages) where:
	- bm_data: dict mapping category name to CAGR values for BM Index
	- cat_avg_data: dict mapping category name to CAGR/values for Category Average
	- fund_weightages: dict mapping fund name to manually adjusted weightage value
	"""
	xlsx_path = Path(processed_xlsx_path)
	if not xlsx_path.exists():
	print(f"Warning: Reference file not found: {processed_xlsx_path}")
	return {}, {}, {}

	wb = openpyxl.load_workbook(str(xlsx_path))
	ws = wb.active

	bm_data = {}
	cat_avg_data = {}
	fund_weightages = {}
	current_category = None

	# Find the Weightage column index by scanning the header row
	weightage_col_idx = None
	for col_idx in range(1, ws.max_column + 1):
	header_val = ws.cell(1, col_idx).value
	if header_val and 'Weightage' in str(header_val):
	weightage_col_idx = col_idx
	break

	for i in range(1, ws.max_row + 1):
	cell_val = ws.cell(i, 1).value

	# Check if it's a category header
	if cell_val and ':' in str(cell_val) and any(x in str(cell_val) for x in ['Equity', 'Debt', 'Hybrid', 'Solution', 'Other']):
	current_category = cell_val

	# Check if it's BM Index row
	elif cell_val == 'BM Index' and current_category:
	bm_1y = ws.cell(i, 6).value
	bm_3y = ws.cell(i, 7).value
	bm_5y = ws.cell(i, 8).value
	bm_10y = ws.cell(i, 9).value

	# Only store if at least one value is present
	if any([bm_1y, bm_3y, bm_5y, bm_10y]):
	bm_data[current_category] = {
	'cagr_1y': bm_1y,
	'cagr_3y': bm_3y,
	'cagr_5y': bm_5y,
	'cagr_10y': bm_10y
	}

	# Check if it's Category Average row
	elif cell_val == 'Category Average' and current_category:
	cat_1y = ws.cell(i, 6).value
	cat_3y = ws.cell(i, 7).value
	cat_5y = ws.cell(i, 8).value
	cat_10y = ws.cell(i, 9).value
	pe = ws.cell(i, 12).value
	pb = ws.cell(i, 13).value

	# Only store if at least one CAGR value is present
	if any([cat_1y, cat_3y, cat_5y, cat_10y]):
	cat_avg_data[current_category] = {
	'cagr_1y': cat_1y,
	'cagr_3y': cat_3y,
	'cagr_5y': cat_5y,
	'cagr_10y': cat_10y,
	'pe_ratio': pe,
	'pb_ratio': pb
	}

	# Check if it's a fund row (not category header, BM Index, or Category Average)
	elif cell_val and cell_val not in ['BM Index', 'Category Average', 'Fund'] and current_category:
	# Extract fund name
	fund_name = str(cell_val).strip()

	# Extract weightage if we found the Weightage column
	if weightage_col_idx:
	weightage_val = ws.cell(i, weightage_col_idx).value
	if weightage_val is not None:
	try:
	# Convert to int if possible, otherwise round float to nearest int
	if isinstance(weightage_val, float):
	fund_weightages[fund_name] = int(round(weightage_val))
	else:
	fund_weightages[fund_name] = int(weightage_val)
	except (ValueError, TypeError):
	# If conversion fails, skip this fund
	pass

	wb.close()

	print(f"Loaded reference data: {len(bm_data)} categories with BM Index, {len(cat_avg_data)} with Category Average, {len(fund_weightages)} fund weightages")

	return bm_data, cat_avg_data, fund_weightages


	def get_fund_weightage_from_reference(fund_name: str, fund_weightages: Dict[str, int]) -> Optional[int]:
	"""
	Get the manually adjusted weightage for a fund from reference data.

	Args:
	fund_name: Name of the fund
	fund_weightages: Dictionary of fund name to weightage from Processed_data.xlsx

	Returns:
	Weightage value if found, None otherwise
	"""
	# Try exact match first
	if fund_name in fund_weightages:
	return fund_weightages[fund_name]

	# Try partial match (in case of slight name differences)
	for ref_fund_name, weightage in fund_weightages.items():
	if fund_name.lower() in ref_fund_name.lower() or ref_fund_name.lower() in fund_name.lower():
	return weightage

	return None


	# Default reference file path
	DEFAULT_REFERENCE_PATH = "PS/Processed data.xlsx"


	# Default reference file path
	DEFAULT_REFERENCE_PATH = "PS/Processed data.xlsx"