Spaces:

Darshan03
/

Test-app

Sleeping

App Files Files Community

Darshan03 commited on Jan 28, 2025

Commit

96e7d53

verified ·

1 Parent(s): 0e754f2

Upload 3 files

Browse files

Files changed (3) hide show

portfolio.py +268 -0
scenario.py +145 -0
simluation_data.py +172 -0

portfolio.py ADDED Viewed

	@@ -0,0 +1,268 @@

+import pandas as pd
+import os
+import json
+import yfinance as yf
+from langchain_core.output_parsers import JsonOutputParser
+from pydantic import BaseModel, Field, ValidationError
+from typing import List, Optional, Dict
+from langchain_groq import ChatGroq
+from dataclasses import dataclass, field
+from dotenv import load_dotenv
+import pickle
+load_dotenv()  # Load environment variables from .env
+# Configuration: Move to configurations
+class Config:
+    ALPHA_VANTAGE_API_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")
+    GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+    STOCK_DATA_DIR = "stock_data_NSE"
+    OUTPUT_FILE = "output_files/portfolio.json"
+    SECTORS = [
+        "Communication Services",
+        "Consumer Discretionary",
+        "Consumer Staples",
+        "Energy",
+        "Financials",
+        "Health Care",
+        "Industrials",
+        "Information Technology",
+        "Materials",
+        "Real Estate",
+        "Utilities"
+    ]
+# Create the output directory if it doesn't exist
+if not os.path.exists(Config.STOCK_DATA_DIR):
+    os.makedirs(Config.STOCK_DATA_DIR)
+def fetch_stock_data(symbols: List[str]) -> Dict[str, pd.DataFrame | None]:
+    """Fetches stock data for multiple symbols from Yahoo Finance.
+    Args:
+        symbols (list): A list of stock symbols (e.g., ["RELIANCE.NS", "TCS.NS"]).
+    Returns:
+        dict: A dictionary where keys are stock symbols and values are pandas DataFrames or None if an error occurred.
+    """
+    stock_dataframes = {}
+    for symbol in symbols:
+        try:
+            ticker = yf.Ticker(symbol)
+            data = ticker.history(period="max")
+            if data.empty:
+                print(f"Warning: No data found for symbol '{symbol}'.")
+                stock_dataframes[symbol] = None
+                continue
+            stock_dataframes[symbol] = data
+        except Exception as e:
+            print(f"Error fetching data for symbol '{symbol}': {e}")
+            stock_dataframes[symbol] = None
+    return stock_dataframes
+def store_stock_data(stock_dataframes: Dict[str, pd.DataFrame | None],
+                     output_path: str = Config.STOCK_DATA_DIR) -> None:
+    """Stores stock data to local CSV files.
+    Args:
+        stock_dataframes (dict): A dictionary where keys are stock symbols and values are pandas DataFrames.
+        output_path (str, optional): Path to store files. Defaults to STOCK_DATA_DIR
+    """
+    for symbol, data in stock_dataframes.items():
+        if data is not None:
+            file_name = f"{symbol}_daily_data.csv"
+            file_path = os.path.join(output_path, file_name)
+            try:
+                data.to_csv(file_path)
+                print(f"Info: Data for '{symbol}' saved to {file_path}")
+            except Exception as e:
+                print(f"Error saving data for '{symbol}' to {file_path}: {e}")
+        else:
+            print(f"Warning: No data available for '{symbol}', skipping storage.")
+def load_stock_data_and_extract_price(output_path_dir: str) -> Dict[str, Dict[str, float]]:
+    """Loads stock data from CSV files and extracts the most recent (last) day's closing price.
+    Args:
+        output_path_dir (str): Path where the CSV files are located.
+    Returns:
+        dict: A dictionary where keys are stock symbols and values are dictionaries containing the initial price.
+    """
+    all_stock_data = {}
+    for filename in os.listdir(output_path_dir):
+        if filename.endswith("_daily_data.csv"):
+            symbol = filename.replace("_daily_data.csv", "")
+            file_path = os.path.join(output_path_dir, filename)
+            try:
+                df = pd.read_csv(file_path, index_col=0)
+                if not df.empty:
+                    initial_price = df.iloc[-1]['Close']
+                    all_stock_data[symbol] = {"initial_price": initial_price}
+                else:
+                    print(f"Warning: Empty dataframe for symbol '{symbol}'. Setting initial price to 0")
+                    all_stock_data[symbol] = {"initial_price": 0.0}
+            except (IndexError, KeyError, FileNotFoundError) as e:
+                print(f"Error occurred for reading {symbol}, due to: {e}")
+                all_stock_data[symbol] = {"initial_price": 0.0}  # default initial price is 0.0
+    return all_stock_data
+def merge_stock_data_with_price(stock_data: Dict, extracted_data: Dict) -> Dict:
+    """Merges the extracted price data with the main stock data.
+    Args:
+        stock_data (dict): Stock data dictionary (name, symbol, quantity)
+        extracted_data (dict): Extracted price data dictionary (symbol: initial_price)
+    Returns:
+       dict: merged data of stocks
+    """
+    merged_stock_data = stock_data.copy()
+    for key, value in stock_data.items():
+        symbol = value["symbol"]
+        if symbol in extracted_data:
+            merged_stock_data[key]["initial_price"] = extracted_data[symbol]["initial_price"]
+        else:
+            merged_stock_data[key]["initial_price"] = 0.0  # default value if it cannot be extracted
+    return merged_stock_data
+def generate_prompt(stock_data: Dict) -> str:
+    """Generates a prompt for the language model with all the stock data
+    Args:
+        stock_data (dict): merged stock data that includes stock name, symbol, quantity, and initial price
+    Returns:
+       str: Formatted prompt for LLM
+    """
+    prompt_template_with_price = """
+    You are a financial analysis expert.
+    Please provide a summary of the following stock data, including the company name, stock symbol, and initial purchase price.
+    Stock Data:
+    {stock_data}
+    Summary:
+    """
+    stock_json_str = json.dumps(stock_data)
+    formatted_prompt_with_price = prompt_template_with_price.format(stock_data=stock_json_str)
+    return formatted_prompt_with_price
+class Asset(BaseModel):
+    """Represents an asset within a portfolio."""
+    quantity: int = Field(..., description="The number of shares or units held for this specific asset.")
+    initial_price: float = Field(..., description="The initial purchase price per share or unit of this asset.")
+    sector: str = Field(..., description=f"""The economic sector of the asset, based on the stock symbol or company name.
+                                        For example, use this {Config.SECTORS}'Financials' for HDFC or JPM, 'consumer' for PG, 'Information Technology' for GOOG. This categorization
+                                        should be done based on the business nature of the company whose stock is traded. For instance,
+                                         if the stock symbol is 'HDFCBANK', the sector is expected to be 'Financials'.""")
+class Portfolio(BaseModel):
+    """Represents an individual portfolio."""
+    name: str = Field(...,
+                      description="The name given to this portfolio, for example 'Diversified Portfolio'. 'Aggressive Tech Portfolio' ")
+    assets: Dict[str, Asset] = Field(..., description="""A dictionary containing the assets within this portfolio. The keys of the dictionary
+                                   are the ticker symbols of the stocks (e.g., 'JPM', 'PG'), and the values are the corresponding
+                                   'Asset' objects, which define the quantity, initial price, and sector for each asset.
+                                   Example: {'JPM': {'quantity': 150, 'initial_price': 140, 'sector': 'finance'},
+                                            'PG': {'quantity': 200, 'initial_price': 160, 'sector': 'consumer'}}"""
+                                     )
+def invoke_llm_for_portfolio(formatted_prompt: str) -> Portfolio:
+    """Invokes the LLM for structured output of the portfolio
+    Args:
+      formatted_prompt (str): formatted prompt for the LLM
+    Returns:
+        Portfolio: structured output of the portfolio
+    """
+    llm = ChatGroq(groq_api_key=Config.GROQ_API_KEY, model_name="llama-3.1-8b-instant")
+    structured_llm = llm.with_structured_output(Portfolio)
+    try:
+        output = structured_llm.invoke(formatted_prompt)
+        return output
+    except ValidationError as e:
+        print(f"Error during LLM invocation: {e}")
+        raise
+    except Exception as e:
+        print(f"Unexpected error during LLM invocation {e}")
+        raise
+def portfolio_to_json(portfolio: Portfolio, output_file: str = Config.OUTPUT_FILE) -> None:
+    """Converts a Portfolio object to a JSON string and saves it to a file."""
+    try:
+        json_str = portfolio.model_dump_json(indent=4)
+        with open(output_file, "w") as f:
+            f.write(json_str)
+        print(f"Info: Portfolio saved to '{output_file}'")
+    except Exception as e:
+        print(f"Error saving JSON file {e}")
+if __name__ == '__main__':
+    # Sample stock data
+    stock_data = {
+        "stock1": {"name": "Reliance Industries Ltd.", "symbol": "RELIANCE.NS", "quantity": 10},
+        "stock2": {"name": "Tata Consultancy Services Ltd.", "symbol": "TCS.NS", "quantity": 15},
+        "stock3": {"name": "HDFC Bank Ltd.", "symbol": "HDFCBANK.NS", "quantity": 20},
+        "stock4": {"name": "Infosys Ltd.", "symbol": "INFY.NS", "quantity": 12},
+        "stock5": {"name": "Hindustan Unilever Ltd.", "symbol": "HINDUNILVR.NS", "quantity": 8}
+    }
+    # 1. Fetch stock data
+    stock_symbols = [value["symbol"] for value in stock_data.values()]
+    stock_dfs = fetch_stock_data(stock_symbols)
+    # Save DataFrames in a dictionary for future use
+    saved_dataframes = {}
+    if stock_dfs:
+        for symbol, df in stock_dfs.items():
+            if df is not None:
+                # Save DataFrame in the variable
+                saved_dataframes[symbol] = df
+                print(f"Data for '{symbol}' loaded into variable.")
+            else:
+                print(f"No data found for '{symbol}'")
+    else:
+        print("Error occurred during fetching data. DataFrames are not returned.")
+    # Save the dictionary to a local file
+    def save_dataframes(dataframes_dict, filename="output_files/saved_dataframes.pkl"):
+        with open(filename, 'wb') as file:
+            pickle.dump(dataframes_dict, file)
+            print(f"DataFrames successfully saved to {filename}.")
+    save_dataframes(saved_dataframes)
+    # 2. Store data
+    store_stock_data(stock_dfs)
+    # 3. Load the last price
+    extracted_data = load_stock_data_and_extract_price(Config.STOCK_DATA_DIR)
+    # 4. Merge extracted price with the main dictionary
+    merged_stock_data = merge_stock_data_with_price(stock_data, extracted_data)
+    # 5. Generate prompt for LLM
+    formatted_prompt = generate_prompt(merged_stock_data)
+    print(formatted_prompt)
+    # 6. Invoke LLM
+    try:
+        portfolio_output = invoke_llm_for_portfolio(formatted_prompt)
+        print(portfolio_output)
+    except Exception as e:
+        print(f"An unexpected error occurred during the LLM invocation: {e}")
+    else:
+        # 7. Save portfolio output to JSON
+        portfolio_to_json(portfolio_output)

scenario.py ADDED Viewed

	@@ -0,0 +1,145 @@

+# Import required modules
+import requests
+from bs4 import BeautifulSoup
+import nest_asyncio
+import asyncio
+import json
+import re
+from crawl4ai import *
+import os
+from dotenv import load_dotenv
+import google.generativeai as genai
+# Load environment variables from a .env file
+load_dotenv()  # Make sure a .env file exists with GOOGLE_API_KEY=<your_api_key>
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")  # Fetch the API key
+# Apply nest_asyncio to enable asynchronous tasks in Jupyter/interactive environments
+nest_asyncio.apply()
+# Asynchronous function to extract text from a website
+async def extract_text_from_website(url):
+    async with AsyncWebCrawler() as crawler:
+        result = await crawler.arun(url=url)
+        return result.markdown
+# Define market sectors
+# Define the prompt for generating market scenarios
+# Configure the generative AI model
+genai.configure(api_key=GOOGLE_API_KEY)  # Replace with your API key
+generation_config = {
+    "temperature": 1,
+    "top_p": 0.95,
+    "top_k": 40,
+    "max_output_tokens": 8192,
+    "response_mime_type": "text/plain",
+}
+model = genai.GenerativeModel(
+    model_name="gemini-2.0-flash-exp",
+    generation_config=generation_config,
+)
+chat_session = model.start_chat()
+# Function to get a response from the generative AI model
+def get_response(llm, prompt):
+    response = llm.send_message(prompt)
+    return response
+# Function to extract JSON content from the response
+def extract_json_content(text):
+    match = re.search(r"```json\n(.*?)```", text, re.DOTALL)
+    if match:
+        return match.group(1).strip()
+    else:
+        return None
+if __name__ == "__main__":
+    # Extract market data from the given URL
+    url = "https://www.livemint.com/market/stock-market-news/page-7"
+    context_data = asyncio.run(extract_text_from_website(url))
+    sectors = [
+        "Communication Services",
+        "Consumer Discretionary",
+        "Consumer Staples",
+        "Energy",
+        "Financials",
+        "Health Care",
+        "Industrials",
+        "Information Technology",
+        "Materials",
+        "Real Estate",
+        "Utilities",
+    ]
+    prompt = f"""
+    # TASK: Analyze market context and identify potential market scenarios.
+    # CONTEXT:
+    {context_data}
+    # END CONTEXT
+    # INSTRUCTION: Based on the provided market context, analyze and identify up to three plausible market scenarios.
+    # For each scenario, determine its name (e.g., "Moderate Downturn"), the general market direction ("up" or "down"), a major trigger point that could cause the scenario to unfold, and a list of sectors that would be significantly impacted. Each 'sector_impact' list should have less than or equal to 4 sectors.
+    # OUTPUT FORMAT: Provide the analysis in JSON format with the following structure.
+    # Use the sector names provided:
+    {sectors}
+    # EXAMPLE:
+    ```json
+    {{
+        "market_scenarios": {{
+            "scenario1": {{
+                "name": "Moderate Downturn",
+                "direction": "down",
+                "trigger": "Interest rate hike",
+                "sector_impact": [
+                    "Financials",
+                    "Energy"
+                ]
+            }},
+            "scenario2": {{
+                "name": "Bullish Growth",
+                "direction": "up",
+                "trigger": "Successful vaccine rollout",
+                "sector_impact": [
+                    "Health Care",
+                    "Information Technology"
+                ]
+            }}
+        }}
+    }}
+    """
+    # Generate the response
+    answer = get_response(chat_session, prompt)
+    # Extract the JSON output from the response
+    json_output = extract_json_content(answer.text)
+    # Define output file path
+    output_file = "output_files/scenario.json"
+    # Parse the output into a JSON object and save it to a file
+    try:
+        analysis_json = json.loads(json_output)
+        os.makedirs(os.path.dirname(output_file), exist_ok=True)  # Ensure the output directory exists
+        with open(output_file, "w") as f:
+            json.dump(analysis_json, f, indent=4)  # Save JSON to a file with indentation
+        print(f"Analysis saved to '{output_file}'")
+    except json.JSONDecodeError:
+        print("Error: Could not decode the output from the model into JSON format.")
+    except Exception as e:
+        print(f"Error: {e}")

simluation_data.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import json
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import pickle
+# Function for Monte Carlo Simulation
+def monte_carlo_simulation(portfolio_data, scenario_data, num_simulations=10000):
+    """
+    Performs a Monte Carlo simulation on a portfolio based on market scenarios.
+    Args:
+        portfolio_data (dict): Dictionary of portfolio data.
+        scenario_data (dict): Dictionary of market scenario data.
+        num_simulations (int, optional): The number of simulations. Defaults to 10000.
+    Returns:
+        dict: A dictionary containing simulation results for each scenario.
+    """
+    scenarios = scenario_data["market_scenarios"]
+    results = {}
+    for scenario_key, scenario_details in scenarios.items():
+        scenario_name = scenario_details["name"]
+        sector_impacts = scenario_details.get("sector_impact", {})
+        results[scenario_name] = {
+            "portfolio_values": [],
+            "average_return": 0,
+            "std_dev_return": 0,
+            "percentiles": {},
+        }
+        for _ in range(num_simulations):
+            portfolio_value = 0
+            for asset_name, asset_details in portfolio_data["assets"].items():
+                sector = asset_details["sector"]
+                quantity = asset_details["quantity"]
+                initial_price = asset_details["initial_price"]
+                price_change_percentage = 0
+                if sector in sector_impacts:
+                    price_change_percentage = np.random.normal(
+                        loc=sector_impacts[sector] / 100, scale=0.1
+                    )
+                # Calculate the new price
+                new_price = initial_price * (1 + price_change_percentage)
+                portfolio_value += new_price * quantity
+            results[scenario_name]["portfolio_values"].append(portfolio_value)
+        # Calculate Results
+        portfolio_values = results[scenario_name]["portfolio_values"]
+        initial_portfolio_value = sum(
+            asset["quantity"] * asset["initial_price"] for asset in portfolio_data["assets"].values()
+        )
+        returns = [
+            (value - initial_portfolio_value) / initial_portfolio_value
+            for value in portfolio_values
+        ]
+        results[scenario_name]["average_return"] = np.mean(returns)
+        results[scenario_name]["std_dev_return"] = np.std(returns)
+        results[scenario_name]["percentiles"] = {
+            5: np.percentile(returns, 5),
+            25: np.percentile(returns, 25),
+            50: np.percentile(returns, 50),
+            75: np.percentile(returns, 75),
+            95: np.percentile(returns, 95),
+        }
+    return results
+if __name__ == "__main__":
+    # Load input data
+    with open("output_files/scenario.json") as f:
+        scenario_data = json.load(f)
+    with open("output_files/portfolio.json") as f:
+        portfolio_data = json.load(f)
+    # Load the dictionary from the local file
+    def load_dataframes(filename="output_files/saved_dataframes.pkl"):
+        try:
+            with open(filename, 'rb') as file:
+                saved_dataframes = pickle.load(file)
+                print(f"DataFrames successfully loaded from {filename}.")
+                return saved_dataframes
+        except FileNotFoundError:
+            print(f"File {filename} not found.")
+            return None
+    saved_dataframes = load_dataframes()
+    # Placeholder for storing results
+    scenario_results = {}
+    # Process each scenario
+    for scenario_name, scenario_details in scenario_data["market_scenarios"].items():
+        impacted_sectors = scenario_details["sector_impact"]
+        # Filter assets in the impacted sectors
+        relevant_assets = [
+            symbol
+            for symbol, details in portfolio_data["assets"].items()
+            if details["sector"] in impacted_sectors
+        ]
+        # Calculate magnitudes for the scenario
+        sector_magnitudes = {}
+        for symbol in relevant_assets:
+            df = saved_dataframes[symbol]
+            sector = portfolio_data["assets"][symbol]["sector"]
+            # Calculate magnitude as the absolute difference between first and last Close price
+            magnitude = abs(df["Close"].iloc[-2] - df["Close"].iloc[-1])
+            # Aggregate by sector
+            if sector not in sector_magnitudes:
+                sector_magnitudes[sector] = 0
+            sector_magnitudes[sector] += magnitude
+        # Calculate aggregated magnitude for the scenario
+        aggregated_magnitude = sum(sector_magnitudes.values())
+        # Store results
+        scenario_results[scenario_name] = {
+            "individual_magnitudes": sector_magnitudes,
+            "aggregated_magnitude": aggregated_magnitude,
+        }
+    # Display results
+    for scenario_name, results in scenario_results.items():
+        print(f"\nScenario: {scenario_name}")
+        print("Individual Sector Magnitudes:")
+        for sector, magnitude in results["individual_magnitudes"].items():
+            print(f"  {sector}: {magnitude:.2f}")
+        print(f"Aggregated Magnitude: {results['aggregated_magnitude']:.2f}")
+    # Integrate calculated results into scenario data
+    for scenario_id, results in scenario_results.items():
+        # Update the sector impacts to include individual magnitudes
+        scenario_data["market_scenarios"][scenario_id]["sector_impact"] = results["individual_magnitudes"]
+        # Update aggregated magnitude
+        scenario_data["market_scenarios"][scenario_id]["aggregated_magnitude"] = results["aggregated_magnitude"]
+    # Save the updated scenario data to a local JSON file
+    output_file_path = "output_files/updated_scenario_data.json"
+    with open(output_file_path, "w") as file:
+        json.dump(scenario_data, file, indent=4)
+    print(f"Updated scenario data saved to '{output_file_path}' successfully!")
+    # Run Monte Carlo simulation
+    simulation_results = monte_carlo_simulation(portfolio_data, scenario_data)
+    # Save simulation results to a local JSON file
+    simulation_results_file = "output_files/simulation_results.json"
+    with open(simulation_results_file, "w") as file:
+        json.dump(simulation_results, file, indent=4)
+    print(f"Simulation results saved to '{simulation_results_file}' successfully!")
+    # Print simulation results
+    for scenario_name, results in simulation_results.items():
+        print(f"Scenario: {scenario_name}")
+        print(f"  Average Return: {results['average_return']:.4f}")
+        print(f"  Std Dev Return: {results['std_dev_return']:.4f}")
+        print("  Return Percentiles:")
+        for percentile, value in results["percentiles"].items():
+            print(f"    {percentile}th: {value:.4f}")
+        print("-" * 40)