Spaces:
Runtime error
Runtime error
| # import streamlit as st | |
| # import pandas as pd | |
| # import numpy as np | |
| # import torch | |
| # from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM | |
| # from peft import PeftModel | |
| # from sklearn.linear_model import LinearRegression | |
| # from sklearn.model_selection import train_test_split | |
| # from sklearn.metrics import mean_squared_error, r2_score | |
| # import matplotlib.pyplot as plt | |
| # # Set page configuration | |
| # st.set_page_config(layout="wide", page_title="FinGPT Investment Predictor") | |
| # # --- Part 1: Financial Sentiment Analysis Model Loading and Function --- | |
| # # Use st.cache_resource to load heavy models only once | |
| # @st.cache_resource | |
| # def load_sentiment_model(sentiment_model_name="FinGPT/fingpt-sentiment_llama2-13b_lora", | |
| # base_tokenizer_name="meta-llama/Llama-2-13b-chat-hf"): # Added base_tokenizer_name | |
| # """ | |
| # Loads the pre-trained sentiment analysis model and tokenizer. | |
| # Uses st.cache_resource to prevent reloading on every Streamlit rerun. | |
| # """ | |
| # st.write(f"Loading sentiment tokenizer from base model: {base_tokenizer_name}...") | |
| # tokenizer = AutoTokenizer.from_pretrained(base_tokenizer_name) # Load tokenizer from base Llama model | |
| # st.write(f"Loading sentiment model: {sentiment_model_name}...") | |
| # # Load the sentiment model (which is the LoRA adapter) | |
| # model = AutoModelForSequenceClassification.from_pretrained(sentiment_model_name) | |
| # model.eval() # Set model to evaluation mode | |
| # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # model.to(device) | |
| # st.write(f"Sentiment model loaded. Using device: {device}") | |
| # return tokenizer, model, device | |
| # tokenizer, sentiment_model, device = load_sentiment_model() | |
| # def get_sentiment_score_and_label(text): | |
| # """ | |
| # Analyzes the sentiment of the given text using the loaded model. | |
| # Returns a numerical score (-1 to 1) and a categorical label (negative/neutral/positive). | |
| # """ | |
| # inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device) | |
| # with torch.no_grad(): | |
| # outputs = sentiment_model(**inputs) | |
| # logits = outputs.logits | |
| # probabilities = torch.softmax(logits, dim=1).cpu().numpy()[0] | |
| # # Assuming the FinGPT sentiment model outputs logits in the order: [negative, neutral, positive]. | |
| # neg_score = probabilities[0] | |
| # neu_score = probabilities[1] | |
| # pos_score = probabilities[2] | |
| # # A simple weighted average to get a single sentiment score between -1 and 1 | |
| # sentiment_score = (pos_score * 1) + (neg_score * -1) + (neu_score * 0) | |
| # labels = ["negative", "neutral", "positive"] | |
| # predicted_class_id = logits.argmax().item() | |
| # predicted_label = labels[predicted_class_id] | |
| # return sentiment_score, predicted_label | |
| # # --- Part 2: Simulate Financial Data and Train Prediction Model --- | |
| # # Use st.cache_data to run data generation and model training only once | |
| # @st.cache_data | |
| # def prepare_data_and_train_model(): | |
| # """ | |
| # Simulates historical financial data, calculates sentiment, | |
| # and trains a simple linear regression model. | |
| # """ | |
| # st.write("Simulating financial data and training prediction model...") | |
| # # Let's create some dummy historical data for a stock and news headlines | |
| # dates = pd.to_datetime(pd.date_range(start='2024-01-01', periods=60, freq='D')) # Increased period for better visualization | |
| # np.random.seed(42) # for reproducibility | |
| # # Simulate stock prices with some trend and noise | |
| # base_price = 100 | |
| # prices = [base_price + np.random.uniform(-2, 2)] | |
| # for _ in range(1, len(dates)): | |
| # change = np.random.uniform(-1, 1) + (np.random.uniform(-0.5, 0.5) * np.random.choice([-1, 1], p=[0.2, 0.8])) | |
| # prices.append(prices[-1] + change) | |
| # prices = np.array(prices) + np.cumsum(np.random.uniform(-0.1, 0.3, len(dates))) | |
| # # Simulate financial news headlines for each day | |
| # dummy_news = [ | |
| # "Tech company reports strong Q4 earnings, stock up.", | |
| # "Market shows signs of recovery after recent dip.", | |
| # "Government announces new regulations affecting energy sector.", | |
| # "Company X faces legal challenges, shares fall.", | |
| # "Positive economic indicators boost investor confidence.", | |
| # "Global supply chain issues continue to impact manufacturing.", | |
| # "Innovation in AI drives new growth opportunities for company Z.", | |
| # "Analyst downgrades stock, citing valuation concerns.", | |
| # "Central bank holds interest rates steady, as expected.", | |
| # "Surprise acquisition announced, boosting stock of target company.", | |
| # "Quarterly sales below expectations, cautious outlook given.", | |
| # "New product launch receives mixed reviews.", | |
| # "Commodity prices stabilizing after volatile period.", | |
| # "Major competitor announces expansion plans.", | |
| # "Strong consumer spending data released.", | |
| # "Company Y CEO resigns amidst controversy.", | |
| # "Healthcare sector sees increased M&A activity.", | |
| # "Inflation concerns persist, impacting consumer sentiment.", | |
| # "Renewable energy stocks gain traction.", | |
| # "Geopolitical tensions rise, market volatility increases.", | |
| # "Positive clinical trial results for biotech firm.", | |
| # "Earnings report beats estimates, stock rallies.", | |
| # "Product recall announced, shares decline sharply.", | |
| # "New trade agreement expected to benefit exporters.", | |
| # "Tech giant invests heavily in R&D, future growth anticipated.", | |
| # "Retail sales unexpectedly weak last month.", | |
| # "Financial institution expands services, positive outlook.", | |
| # "Mining company faces environmental penalties.", | |
| # "Market sentiment remains cautiously optimistic.", | |
| # "Dividend increase announced, attracting income investors.", | |
| # "Breakthrough in medical research boosts pharma stock.", | |
| # "New energy policy to impact utility companies.", | |
| # "Cybersecurity firm reports major data breach.", | |
| # "E-commerce sales exceed forecasts during holiday season.", | |
| # "Automotive industry faces chip shortage challenges.", | |
| # "Biotech startup secures significant funding.", | |
| # "Real estate market shows signs of cooling.", | |
| # "Investment bank upgrades rating for tech stock.", | |
| # "Consumer confidence index reaches new high.", | |
| # "Airline industry recovers strongly post-pandemic.", | |
| # "Retailer announces store closures, stock drops.", | |
| # "Software company acquires competitor, market reacts positively.", | |
| # "Global trade tensions ease, positive for exporters.", | |
| # "New environmental regulations impact manufacturing costs.", | |
| # "Tourism sector sees strong rebound, hotel stocks rise.", | |
| # "Bank reports solid profits despite economic headwinds.", | |
| # "Construction firm wins major government contract.", | |
| # "Food prices continue to rise, affecting grocery chains.", | |
| # "Luxury brand sales surge in emerging markets.", | |
| # "Telecommunications giant invests in 5G infrastructure." | |
| # ] | |
| # # Ensure we have enough news for all dates | |
| # if len(dummy_news) < len(dates): | |
| # dummy_news_extended = (dummy_news * (len(dates) // len(dummy_news) + 1))[:len(dates)] | |
| # else: | |
| # dummy_news_extended = dummy_news[:len(dates)] | |
| # # Calculate sentiment scores for each day's news | |
| # sentiment_scores = [get_sentiment_score_and_label(news)[0] for news in dummy_news_extended] | |
| # # Create a DataFrame | |
| # data = pd.DataFrame({ | |
| # 'Date': dates, | |
| # 'Price': prices, | |
| # 'Sentiment': sentiment_scores, | |
| # 'News': dummy_news_extended | |
| # }) | |
| # data.set_index('Date', inplace=True) | |
| # # Add lagged price and sentiment as features | |
| # data['Previous_Day_Price'] = data['Price'].shift(1) | |
| # data['Previous_Day_Sentiment'] = data['Sentiment'].shift(1) | |
| # # Drop the first row which will have NaN due to shifting | |
| # data.dropna(inplace=True) | |
| # # We want to predict 'Price' based on 'Previous_Day_Price' and 'Previous_Day_Sentiment' | |
| # X = data[['Previous_Day_Price', 'Previous_Day_Sentiment']] | |
| # y = data['Price'] | |
| # # Split data into training and testing sets (chronologically) | |
| # test_size_ratio = 0.2 | |
| # split_index = int(len(data) * (1 - test_size_ratio)) | |
| # X_train, X_test = X.iloc[:split_index], X.iloc[split_index:] | |
| # y_train, y_test = y.iloc[:split_index], y.iloc[split_index:] | |
| # # Initialize and train the Linear Regression model | |
| # model_prediction = LinearRegression() | |
| # model_prediction.fit(X_train, y_train) | |
| # # Make predictions on the test set | |
| # y_pred = model_prediction.predict(X_test) | |
| # # Evaluate the model | |
| # mse = mean_squared_error(y_test, y_pred) | |
| # r2 = r2_score(y_test, y_pred) | |
| # return data, X_train, X_test, y_train, y_test, y_pred, model_prediction, mse, r2 | |
| # # Run the data preparation and model training | |
| # data, X_train, X_test, y_train, y_test, y_pred, model_prediction, mse, r2 = prepare_data_and_train_model() | |
| # # --- Part 3: LLM Forecaster Model Loading and Function (Conceptual) --- | |
| # # Load the base Llama-2-7b-chat model (required by the LoRA adapter) | |
| # @st.cache_resource | |
| # def load_base_llm_model(base_model_name="meta-llama/Llama-2-7b-chat-hf"): | |
| # """ | |
| # Loads the base Large Language Model required for the FinGPT forecaster. | |
| # """ | |
| # st.write(f"Loading base LLM model: {base_model_name}...") | |
| # base_tokenizer = AutoTokenizer.from_pretrained(base_model_name) | |
| # base_model = AutoModelForCausalLM.from_pretrained( | |
| # base_model_name, | |
| # torch_dtype=torch.float16, # Use float16 for memory efficiency | |
| # device_map="auto" # Automatically maps model to available devices (GPU if available) | |
| # ) | |
| # return base_tokenizer, base_model | |
| # base_tokenizer, base_llm_model = load_base_llm_model() | |
| # @st.cache_resource | |
| # def load_forecaster_model(forecaster_model_name="FinGPT/fingpt-forecaster_dow30_llama2-7b_lora"): | |
| # """ | |
| # Loads the FinGPT forecaster LoRA adapter and merges it with the base LLM. | |
| # """ | |
| # st.write(f"Loading FinGPT forecaster model: {forecaster_model_name}...") | |
| # # Load the LoRA adapter | |
| # model = PeftModel.from_pretrained(base_llm_model, forecaster_model_name) | |
| # model = model.eval() | |
| # st.write(f"FinGPT forecaster model loaded.") | |
| # return model | |
| # forecaster_llm_model = load_forecaster_model() | |
| # def get_llm_forecast(ticker, current_date, news_summary, current_price): | |
| # """ | |
| # Generates a text-based forecast using the FinGPT forecaster LLM. | |
| # This is a conceptual demonstration of LLM forecasting. | |
| # """ | |
| # # Construct a prompt for the LLM forecaster | |
| # # This prompt structure is simplified; real FinGPT forecasters might expect more complex inputs | |
| # # based on their training data (e.g., historical data, financial statements). | |
| # prompt = f""" | |
| # You are a financial expert. | |
| # Analyze the following information and provide a concise prediction for the stock price movement of {ticker} for the next week. | |
| # Current Date: {current_date.strftime('%Y-%m-%d')} | |
| # Current Price of {ticker}: ${current_price:.2f} | |
| # Recent News: "{news_summary}" | |
| # Based on this information, what is your prediction for {ticker}'s stock price movement next week? Provide a brief analysis. | |
| # """ | |
| # # For Llama-2-chat models, typically prompts are wrapped with specific tokens | |
| # # This is a common format for instruction-tuned Llama models. | |
| # chat_template = f"<s>[INST] {prompt} [/INST]" | |
| # inputs = base_tokenizer(chat_template, return_tensors="pt").to(device) | |
| # # Generate response from the LLM | |
| # with torch.no_grad(): | |
| # outputs = forecaster_llm_model.generate( | |
| # **inputs, | |
| # max_new_tokens=200, # Limit the length of the generated response | |
| # num_return_sequences=1, | |
| # do_sample=True, | |
| # top_k=50, | |
| # top_p=0.95, | |
| # temperature=0.7, | |
| # ) | |
| # # Decode the generated text, removing the prompt part | |
| # response_text = base_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # # The response will include the prompt. We need to find the actual generated text. | |
| # # This parsing might need to be more robust depending on the model's exact output format. | |
| # generated_forecast = response_text.split("[/INST]")[-1].strip() | |
| # return generated_forecast | |
| # # --- Streamlit UI Layout --- | |
| # st.title("FinGPT-Powered Investment Predictor 📈") | |
| # st.markdown(""" | |
| # This application demonstrates how financial news sentiment can be integrated with historical price data | |
| # to make simple investment predictions. It uses a pre-trained FinBERT-like model for sentiment analysis | |
| # and a Linear Regression model for price prediction. | |
| # **Disclaimer:** This is a simplified demonstration for educational purposes only. | |
| # It should **NOT** be used for actual investment decisions. Stock market prediction is highly complex | |
| # and involves many more factors and sophisticated models. | |
| # """) | |
| # # Create tabs for better organization | |
| # tab1, tab2, tab3, tab4 = st.tabs(["Sentiment Analyzer", "Historical Data & Model Performance", "Predict Tomorrow's Price (Linear Regression)", "LLM Forecaster (Conceptual)"]) | |
| # with tab1: | |
| # st.header("Financial News Sentiment Analyzer") | |
| # st.write("Enter a financial news headline or text to get its sentiment.") | |
| # news_input = st.text_area("News Text:", height=150, placeholder="e.g., 'Apple's stock surged after reporting record-breaking earnings.'") | |
| # if st.button("Analyze Sentiment"): | |
| # if news_input: | |
| # sentiment_score, sentiment_label = get_sentiment_score_and_label(news_input) | |
| # st.markdown(f"**Sentiment Score:** `{sentiment_score:.3f}` (closer to 1 is positive, -1 is negative)") | |
| # st.markdown(f"**Sentiment Label:** `{sentiment_label.upper()}`") | |
| # else: | |
| # st.warning("Please enter some text to analyze sentiment.") | |
| # with tab2: | |
| # st.header("Simulated Historical Data and Model Performance") | |
| # st.write("Here's a look at the simulated historical data used and how the prediction model performed on the test set.") | |
| # st.subheader("Sample Historical Data") | |
| # st.dataframe(data.head()) | |
| # st.subheader("Prediction Model Performance") | |
| # st.write(f"**Mean Squared Error (MSE):** `{mse:.2f}`") | |
| # st.write(f"**R-squared (R2):** `{r2:.2f}`") | |
| # st.info("A lower MSE indicates better prediction accuracy, and an R2 closer to 1 indicates that the model explains more of the variance in the target variable.") | |
| # st.subheader("Actual vs. Predicted Prices (Test Set)") | |
| # fig, ax = plt.subplots(figsize=(12, 6)) | |
| # ax.plot(y_test.index, y_test, label='Actual Price', marker='o', linestyle='-', markersize=4) | |
| # ax.plot(y_test.index, y_pred, label='Predicted Price', marker='x', linestyle='--', markersize=4) | |
| # ax.set_title('Stock Price Prediction with Sentiment (Simulated Data)') | |
| # ax.set_xlabel('Date') | |
| # ax.set_ylabel('Price') | |
| # ax.legend() | |
| # ax.grid(True) | |
| # st.pyplot(fig) | |
| # with tab3: | |
| # st.header("Predict Tomorrow's Price (Linear Regression)") | |
| # st.write("Enter today's closing price and relevant news to get a conceptual prediction for tomorrow using a Linear Regression model.") | |
| # last_known_price_simulated = data['Price'].iloc[-1] | |
| # col1, col2 = st.columns(2) | |
| # with col1: | |
| # today_closing_price = st.number_input( | |
| # "Today's Closing Price:", | |
| # min_value=0.0, | |
| # value=float(f"{last_known_price_simulated:.2f}"), | |
| # step=0.1, | |
| # help="Based on the last simulated price from the historical data." | |
| # ) | |
| # with col2: | |
| # today_news_headline = st.text_area( | |
| # "Today's Financial News Headline:", | |
| # value="Market shows strong upward momentum, positive outlook for tech sector.", | |
| # height=100 | |
| # ) | |
| # if st.button("Predict Price (Linear Regression)"): | |
| # if today_closing_price is not None and today_news_headline: | |
| # latest_sentiment_score, latest_sentiment_label = get_sentiment_score_and_label(today_news_headline) | |
| # st.write(f"**Analyzed Sentiment for Today's News:** `{latest_sentiment_label.upper()}` (Score: `{latest_sentiment_score:.3f}`)") | |
| # # Prepare data for prediction | |
| # new_data_for_prediction = pd.DataFrame({ | |
| # 'Previous_Day_Price': [today_closing_price], | |
| # 'Previous_Day_Sentiment': [latest_sentiment_score] | |
| # }) | |
| # # Make prediction | |
| # tomorrow_predicted_price = model_prediction.predict(new_data_for_prediction)[0] | |
| # st.success(f"**Conceptual Prediction for Tomorrow's Price:** `${tomorrow_predicted_price:.2f}`") | |
| # st.info("Remember, this is a conceptual prediction based on a simplified model and simulated data.") | |
| # else: | |
| # st.warning("Please enter both today's closing price and news headline to make a prediction.") | |
| # with tab4: | |
| # st.header("LLM Forecaster (Conceptual)") | |
| # st.write("This section demonstrates how a FinGPT forecaster LLM *could* generate a text-based forecast.") | |
| # st.warning("Note: This model requires significant memory (GPU recommended) and its output is text-based analysis, not a precise numerical prediction like the Linear Regression model.") | |
| # llm_ticker = st.text_input("Stock Ticker (e.g., AAPL, MSFT):", value="AAPL") | |
| # llm_current_price = st.number_input("Current Price:", min_value=0.0, value=175.0, step=0.1) | |
| # llm_news_summary = st.text_area("Recent News Summary:", value="Apple announced strong Q4 earnings, beating analyst expectations and showing robust iPhone sales in emerging markets.", height=100) | |
| # llm_current_date = st.date_input("Current Date:", value=pd.to_datetime('2024-07-15')) | |
| # if st.button("Get LLM Forecast"): | |
| # if llm_ticker and llm_current_price is not None and llm_news_summary and llm_current_date: | |
| # with st.spinner("Generating LLM forecast... This may take a moment."): | |
| # forecast_text = get_llm_forecast(llm_ticker, llm_current_date, llm_news_summary, llm_current_price) | |
| # st.subheader("LLM's Forecast and Analysis:") | |
| # st.write(forecast_text) | |
| # else: | |
| # st.warning("Please fill in all fields to get an LLM forecast.") | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import torch | |
| # from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM | |
| # from peft import PeftModel | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM | |
| from peft import PeftModel, LoraConfig, get_peft_model # Ensure PeftModel is imported | |
| from sklearn.linear_model import LinearRegression | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import mean_squared_error, r2_score | |
| import matplotlib.pyplot as plt | |
| # Set page configuration | |
| st.set_page_config(layout="wide", page_title="FinGPT Investment Predictor") | |
| # --- Part 1: Financial Sentiment Analysis Model Loading and Function --- | |
| # Use st.cache_resource to load heavy models only once | |
| def load_sentiment_model(sentiment_lora_name="FinGPT/fingpt-sentiment_llama2-13b_lora", | |
| base_model_name="meta-llama/Llama-2-13b-chat-hf"): | |
| """ | |
| Loads the pre-trained sentiment analysis model (base + LoRA) and tokenizer. | |
| Uses st.cache_resource to prevent reloading on every Streamlit rerun. | |
| """ | |
| st.write(f"Loading base tokenizer from: {base_model_name}...") | |
| tokenizer = AutoTokenizer.from_pretrained(base_model_name) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| st.write(f"Loading base model for sentiment: {base_model_name}...") | |
| # Load the base Llama 2 model as AutoModelForCausalLM | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| base_model_name, | |
| torch_dtype=torch.float16, # Or bfloat16 if your GPU supports it | |
| device_map="auto" | |
| ) | |
| st.write(f"Loading sentiment LoRA adapter: {sentiment_lora_name}...") | |
| # Load the LoRA adapter on top of the base model | |
| sentiment_model = PeftModel.from_pretrained(base_model, sentiment_lora_name) | |
| sentiment_model.eval() | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| sentiment_model.to(device) | |
| st.write(f"Sentiment model loaded. Using device: {device}") | |
| return tokenizer, sentiment_model, device | |
| tokenizer, sentiment_model, device = load_sentiment_model() | |
| def get_sentiment_score_and_label(text): | |
| """ | |
| Analyzes the sentiment of the given text using the loaded model. | |
| Returns a numerical score (-1 to 1) and a categorical label (negative/neutral/positive). | |
| """ | |
| inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device) | |
| with torch.no_grad(): | |
| outputs = sentiment_model(**inputs) | |
| logits = outputs.logits | |
| probabilities = torch.softmax(logits, dim=1).cpu().numpy()[0] | |
| # Assuming the FinGPT sentiment model outputs logits in the order: [negative, neutral, positive]. | |
| neg_score = probabilities[0] | |
| neu_score = probabilities[1] | |
| pos_score = probabilities[2] | |
| # A simple weighted average to get a single sentiment score between -1 and 1 | |
| sentiment_score = (pos_score * 1) + (neg_score * -1) + (neu_score * 0) | |
| labels = ["negative", "neutral", "positive"] | |
| predicted_class_id = logits.argmax().item() | |
| predicted_label = labels[predicted_class_id] | |
| return sentiment_score, predicted_label | |
| # --- Part 2: Simulate Financial Data and Train Prediction Model --- | |
| # Use st.cache_data to run data generation and model training only once | |
| def prepare_data_and_train_model(): | |
| """ | |
| Simulates historical financial data, calculates sentiment, | |
| and trains a simple linear regression model. | |
| """ | |
| st.write("Simulating financial data and training prediction model...") | |
| # Let's create some dummy historical data for a stock and news headlines | |
| dates = pd.to_datetime(pd.date_range(start='2024-01-01', periods=60, freq='D')) # Increased period for better visualization | |
| np.random.seed(42) # for reproducibility | |
| # Simulate stock prices with some trend and noise | |
| base_price = 100 | |
| prices = [base_price + np.random.uniform(-2, 2)] | |
| for _ in range(1, len(dates)): | |
| change = np.random.uniform(-1, 1) + (np.random.uniform(-0.5, 0.5) * np.random.choice([-1, 1], p=[0.2, 0.8])) | |
| prices.append(prices[-1] + change) | |
| prices = np.array(prices) + np.cumsum(np.random.uniform(-0.1, 0.3, len(dates))) | |
| # Simulate financial news headlines for each day | |
| dummy_news = [ | |
| "Tech company reports strong Q4 earnings, stock up.", | |
| "Market shows signs of recovery after recent dip.", | |
| "Government announces new regulations affecting energy sector.", | |
| "Company X faces legal challenges, shares fall.", | |
| "Positive economic indicators boost investor confidence.", | |
| "Global supply chain issues continue to impact manufacturing.", | |
| "Innovation in AI drives new growth opportunities for company Z.", | |
| "Analyst downgrades stock, citing valuation concerns.", | |
| "Central bank holds interest rates steady, as expected.", | |
| "Surprise acquisition announced, boosting stock of target company.", | |
| "Quarterly sales below expectations, cautious outlook given.", | |
| "New product launch receives mixed reviews.", | |
| "Commodity prices stabilizing after volatile period.", | |
| "Major competitor announces expansion plans.", | |
| "Strong consumer spending data released.", | |
| "Company Y CEO resigns amidst controversy.", | |
| "Healthcare sector sees increased M&A activity.", | |
| "Inflation concerns persist, impacting consumer sentiment.", | |
| "Renewable energy stocks gain traction.", | |
| "Geopolitical tensions rise, market volatility increases.", | |
| "Positive clinical trial results for biotech firm.", | |
| "Earnings report beats estimates, stock rallies.", | |
| "Product recall announced, shares decline sharply.", | |
| "New trade agreement expected to benefit exporters.", | |
| "Tech giant invests heavily in R&D, future growth anticipated.", | |
| "Retail sales unexpectedly weak last month.", | |
| "Financial institution expands services, positive outlook.", | |
| "Mining company faces environmental penalties.", | |
| "Market sentiment remains cautiously optimistic.", | |
| "Dividend increase announced, attracting income investors.", | |
| "Breakthrough in medical research boosts pharma stock.", | |
| "New energy policy to impact utility companies.", | |
| "Cybersecurity firm reports major data breach.", | |
| "E-commerce sales exceed forecasts during holiday season.", | |
| "Automotive industry faces chip shortage challenges.", | |
| "Biotech startup secures significant funding.", | |
| "Real estate market shows signs of cooling.", | |
| "Investment bank upgrades rating for tech stock.", | |
| "Consumer confidence index reaches new high.", | |
| "Airline industry recovers strongly post-pandemic.", | |
| "Retailer announces store closures, stock drops.", | |
| "Software company acquires competitor, market reacts positively.", | |
| "Global trade tensions ease, positive for exporters.", | |
| "New environmental regulations impact manufacturing costs.", | |
| "Tourism sector sees strong rebound, hotel stocks rise.", | |
| "Bank reports solid profits despite economic headwinds.", | |
| "Construction firm wins major government contract.", | |
| "Food prices continue to rise, affecting grocery chains.", | |
| "Luxury brand sales surge in emerging markets.", | |
| "Telecommunications giant invests in 5G infrastructure." | |
| ] | |
| # Ensure we have enough news for all dates | |
| if len(dummy_news) < len(dates): | |
| dummy_news_extended = (dummy_news * (len(dates) // len(dummy_news) + 1))[:len(dates)] | |
| else: | |
| dummy_news_extended = dummy_news[:len(dates)] | |
| # Calculate sentiment scores for each day's news | |
| sentiment_scores = [get_sentiment_score_and_label(news)[0] for news in dummy_news_extended] | |
| # Create a DataFrame | |
| data = pd.DataFrame({ | |
| 'Date': dates, | |
| 'Price': prices, | |
| 'Sentiment': sentiment_scores, | |
| 'News': dummy_news_extended | |
| }) | |
| data.set_index('Date', inplace=True) | |
| # Add lagged price and sentiment as features | |
| data['Previous_Day_Price'] = data['Price'].shift(1) | |
| data['Previous_Day_Sentiment'] = data['Sentiment'].shift(1) | |
| # Drop the first row which will have NaN due to shifting | |
| data.dropna(inplace=True) | |
| # We want to predict 'Price' based on 'Previous_Day_Price' and 'Previous_Day_Sentiment' | |
| X = data[['Previous_Day_Price', 'Previous_Day_Sentiment']] | |
| y = data['Price'] | |
| # Split data into training and testing sets (chronologically) | |
| test_size_ratio = 0.2 | |
| split_index = int(len(data) * (1 - test_size_ratio)) | |
| X_train, X_test = X.iloc[:split_index], X.iloc[split_index:] | |
| y_train, y_test = y.iloc[:split_index], y.iloc[split_index:] | |
| # Initialize and train the Linear Regression model | |
| model_prediction = LinearRegression() | |
| model_prediction.fit(X_train, y_train) | |
| # Make predictions on the test set | |
| y_pred = model_prediction.predict(X_test) | |
| # Evaluate the model | |
| mse = mean_squared_error(y_test, y_pred) | |
| r2 = r2_score(y_test, y_pred) | |
| return data, X_train, X_test, y_train, y_test, y_pred, model_prediction, mse, r2 | |
| # Run the data preparation and model training | |
| data, X_train, X_test, y_train, y_test, y_pred, model_prediction, mse, r2 = prepare_data_and_train_model() | |
| # --- Part 3: LLM Forecaster Model Loading and Function (Conceptual) --- | |
| # Load the base Llama-2-7b-chat model (required by the LoRA adapter) | |
| def load_base_llm_model(base_model_name="meta-llama/Llama-2-7b-chat-hf"): | |
| """ | |
| Loads the base Large Language Model required for the FinGPT forecaster. | |
| """ | |
| st.write(f"Loading base LLM model: {base_model_name}...") | |
| base_tokenizer = AutoTokenizer.from_pretrained(base_model_name) | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| base_model_name, | |
| torch_dtype=torch.float16, # Use float16 for memory efficiency | |
| device_map="auto" # Automatically maps model to available devices (GPU if available) | |
| ) | |
| return base_tokenizer, base_model | |
| base_tokenizer, base_llm_model = load_base_llm_model() | |
| def load_forecaster_model(forecaster_model_name="FinGPT/fingpt-forecaster_dow30_llama2-7b_lora"): | |
| """ | |
| Loads the FinGPT forecaster LoRA adapter and merges it with the base LLM. | |
| """ | |
| st.write(f"Loading FinGPT forecaster model: {forecaster_model_name}...") | |
| # Load the LoRA adapter | |
| model = PeftModel.from_pretrained(base_llm_model, forecaster_model_name) | |
| model = model.eval() | |
| st.write(f"FinGPT forecaster model loaded.") | |
| return model | |
| forecaster_llm_model = load_forecaster_model() | |
| def get_llm_forecast(ticker, current_date, news_summary, current_price): | |
| """ | |
| Generates a text-based forecast using the FinGPT forecaster LLM. | |
| This is a conceptual demonstration of LLM forecasting. | |
| """ | |
| # Construct a prompt for the LLM forecaster | |
| # This prompt structure is simplified; real FinGPT forecasters might expect more complex inputs | |
| # based on their training data (e.g., historical data, financial statements). | |
| prompt = f""" | |
| You are a financial expert. | |
| Analyze the following information and provide a concise prediction for the stock price movement of {ticker} for the next week. | |
| Current Date: {current_date.strftime('%Y-%m-%d')} | |
| Current Price of {ticker}: ${current_price:.2f} | |
| Recent News: "{news_summary}" | |
| Based on this information, what is your prediction for {ticker}'s stock price movement next week? Provide a brief analysis. | |
| """ | |
| # For Llama-2-chat models, typically prompts are wrapped with specific tokens | |
| # This is a common format for instruction-tuned Llama models. | |
| chat_template = f"<s>[INST] {prompt} [/INST]" | |
| inputs = base_tokenizer(chat_template, return_tensors="pt").to(device) | |
| # Generate response from the LLM | |
| with torch.no_grad(): | |
| outputs = forecaster_llm_model.generate( | |
| **inputs, | |
| max_new_tokens=200, # Limit the length of the generated response | |
| num_return_sequences=1, | |
| do_sample=True, | |
| top_k=50, | |
| top_p=0.95, | |
| temperature=0.7, | |
| ) | |
| # Decode the generated text, removing the prompt part | |
| response_text = base_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # The response will include the prompt. We need to find the actual generated text. | |
| # This parsing might need to be more robust depending on the model's exact output format. | |
| generated_forecast = response_text.split("[/INST]")[-1].strip() | |
| return generated_forecast | |
| # --- Streamlit UI Layout --- | |
| st.title("FinGPT-Powered Investment Predictor 📈") | |
| st.markdown(""" | |
| This application demonstrates how financial news sentiment can be integrated with historical price data | |
| to make simple investment predictions. It uses a pre-trained FinBERT-like model for sentiment analysis | |
| and a Linear Regression model for price prediction. | |
| **Disclaimer:** This is a simplified demonstration for educational purposes only. | |
| It should **NOT** be used for actual investment decisions. Stock market prediction is highly complex | |
| and involves many more factors and sophisticated models. | |
| """) | |
| # Create tabs for better organization | |
| tab1, tab2, tab3, tab4 = st.tabs(["Sentiment Analyzer", "Historical Data & Model Performance", "Predict Tomorrow's Price (Linear Regression)", "LLM Forecaster (Conceptual)"]) | |
| with tab1: | |
| st.header("Financial News Sentiment Analyzer") | |
| st.write("Enter a financial news headline or text to get its sentiment.") | |
| news_input = st.text_area("News Text:", height=150, placeholder="e.g., 'Apple's stock surged after reporting record-breaking earnings.'") | |
| if st.button("Analyze Sentiment"): | |
| if news_input: | |
| sentiment_score, sentiment_label = get_sentiment_score_and_label(news_input) | |
| st.markdown(f"**Sentiment Score:** `{sentiment_score:.3f}` (closer to 1 is positive, -1 is negative)") | |
| st.markdown(f"**Sentiment Label:** `{sentiment_label.upper()}`") | |
| else: | |
| st.warning("Please enter some text to analyze sentiment.") | |
| with tab2: | |
| st.header("Simulated Historical Data and Model Performance") | |
| st.write("Here's a look at the simulated historical data used and how the prediction model performed on the test set.") | |
| st.subheader("Sample Historical Data") | |
| st.dataframe(data.head()) | |
| st.subheader("Prediction Model Performance") | |
| st.write(f"**Mean Squared Error (MSE):** `{mse:.2f}`") | |
| st.write(f"**R-squared (R2):** `{r2:.2f}`") | |
| st.info("A lower MSE indicates better prediction accuracy, and an R2 closer to 1 indicates that the model explains more of the variance in the target variable.") | |
| st.subheader("Actual vs. Predicted Prices (Test Set)") | |
| fig, ax = plt.subplots(figsize=(12, 6)) | |
| ax.plot(y_test.index, y_test, label='Actual Price', marker='o', linestyle='-', markersize=4) | |
| ax.plot(y_test.index, y_pred, label='Predicted Price', marker='x', linestyle='--', markersize=4) | |
| ax.set_title('Stock Price Prediction with Sentiment (Simulated Data)') | |
| ax.set_xlabel('Date') | |
| ax.set_ylabel('Price') | |
| ax.legend() | |
| ax.grid(True) | |
| st.pyplot(fig) | |
| with tab3: | |
| st.header("Predict Tomorrow's Price (Linear Regression)") | |
| st.write("Enter today's closing price and relevant news to get a conceptual prediction for tomorrow using a Linear Regression model.") | |
| last_known_price_simulated = data['Price'].iloc[-1] | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| today_closing_price = st.number_input( | |
| "Today's Closing Price:", | |
| min_value=0.0, | |
| value=float(f"{last_known_price_simulated:.2f}"), | |
| step=0.1, | |
| help="Based on the last simulated price from the historical data." | |
| ) | |
| with col2: | |
| today_news_headline = st.text_area( | |
| "Today's Financial News Headline:", | |
| value="Market shows strong upward momentum, positive outlook for tech sector.", | |
| height=100 | |
| ) | |
| if st.button("Predict Price (Linear Regression)"): | |
| if today_closing_price is not None and today_news_headline: | |
| latest_sentiment_score, latest_sentiment_label = get_sentiment_score_and_label(today_news_headline) | |
| st.write(f"**Analyzed Sentiment for Today's News:** `{latest_sentiment_label.upper()}` (Score: `{latest_sentiment_score:.3f}`)") | |
| # Prepare data for prediction | |
| new_data_for_prediction = pd.DataFrame({ | |
| 'Previous_Day_Price': [today_closing_price], | |
| 'Previous_Day_Sentiment': [latest_sentiment_score] | |
| }) | |
| # Make prediction | |
| tomorrow_predicted_price = model_prediction.predict(new_data_for_prediction)[0] | |
| st.success(f"**Conceptual Prediction for Tomorrow's Price:** `${tomorrow_predicted_price:.2f}`") | |
| st.info("Remember, this is a conceptual prediction based on a simplified model and simulated data.") | |
| else: | |
| st.warning("Please enter both today's closing price and news headline to make a prediction.") | |
| with tab4: | |
| st.header("LLM Forecaster (Conceptual)") | |
| st.write("This section demonstrates how a FinGPT forecaster LLM *could* generate a text-based forecast.") | |
| st.warning("Note: This model requires significant memory (GPU recommended) and its output is text-based analysis, not a precise numerical prediction like the Linear Regression model.") | |
| llm_ticker = st.text_input("Stock Ticker (e.g., AAPL, MSFT):", value="AAPL") | |
| llm_current_price = st.number_input("Current Price:", min_value=0.0, value=175.0, step=0.1) | |
| llm_news_summary = st.text_area("Recent News Summary:", value="Apple announced strong Q4 earnings, beating analyst expectations and showing robust iPhone sales in emerging markets.", height=100) | |
| llm_current_date = st.date_input("Current Date:", value=pd.to_datetime('2024-07-15')) | |
| if st.button("Get LLM Forecast"): | |
| if llm_ticker and llm_current_price is not None and llm_news_summary and llm_current_date: | |
| with st.spinner("Generating LLM forecast... This may take a moment."): | |
| forecast_text = get_llm_forecast(llm_ticker, llm_current_date, llm_news_summary, llm_current_price) | |
| st.subheader("LLM's Forecast and Analysis:") | |
| st.write(forecast_text) | |
| else: | |
| st.warning("Please fill in all fields to get an LLM forecast.") | |