szeandlinkProject_Testing

Sleeping

App Files Files Community

Szeyu commited on May 15, 2025

Commit

78c1fff

verified ·

1 Parent(s): e7f6be9

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -153

app.py CHANGED Viewed

@@ -1,161 +1,62 @@
 import streamlit as st
-import pandas as pd
-from sklearn.model_selection import train_test_split
-from datasets import Dataset
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, get_linear_schedule_with_warmup
-import numpy as np
-import torch
 from transformers import pipeline
-from collections import Counter
-import time
-from tqdm import tqdm
-import evaluate
-# Function to load and process data
-def load_and_process_data(news_file, trend_file):
-    news_df = pd.read_csv(news_file)
-    trend_df = pd.read_csv(trend_file)
-    trend_df = trend_df.rename(columns={'Symbol': 'Stock'})
-    news_labeled_df = news_df.merge(trend_df[['Stock', 'Trend']], on='Stock', how='left')
-    news_labeled_df = news_labeled_df[news_labeled_df['Trend'].isin(['Positive', 'Negative'])]
-    label_map = {'Negative': 0, 'Positive': 1}
-    news_labeled_df['label'] = news_labeled_df['Trend'].map(label_map)
-    return news_labeled_df
-# Function to check class imbalance
-def check_class_imbalance(df):
-    class_counts = df['label'].value_counts()
-    st.write("**Class Distribution:**", class_counts.to_dict())
-    if class_counts.min() / class_counts.max() < 0.5:
-        st.warning("Warning: Class imbalance detected. Consider balancing techniques.")
-# Function to split data
-def split_data(df):
-    stocks = df['Stock'].unique()
-    train_val_stocks, test_stocks = train_test_split(stocks, test_size=0.2, random_state=42)
-    train_stocks, val_stocks = train_test_split(train_val_stocks, test_size=0.25, random_state=42)
-    train_df = df[df['Stock'].isin(train_stocks)]
-    val_df = df[df['Stock'].isin(val_stocks)]
-    test_df = df[df['Stock'].isin(test_stocks)]
-    return train_df, val_df, test_df
-# Function to tokenize datasets
-def tokenize_datasets(train_df, val_df, test_df, tokenizer):
-    train_dataset = Dataset.from_pandas(train_df[['Headline', 'label']])
-    val_dataset = Dataset.from_pandas(val_df[['Headline', 'label']])
-    test_dataset = Dataset.from_pandas(test_df[['Headline', 'label']])
-    def tokenize_function(examples):
-        return tokenizer(examples['Headline'], padding='max_length', truncation=True, max_length=128)
-    tokenized_train = train_dataset.map(tokenize_function, batched=True)
-    tokenized_val = val_dataset.map(tokenize_function, batched=True)
-    tokenized_test = test_dataset.map(tokenize_function, batched=True)
-    return tokenized_train, tokenized_val, tokenized_test
-# Function to load model with caching
-@st.cache_resource
-def load_model():
-    model = AutoModelForSequenceClassification.from_pretrained(
-        "yiyanghkust/finbert-tone",
-        num_labels=2,
-        ignore_mismatched_sizes=True
-    )
-    for param in model.bert.encoder.layer[:6].parameters():
-        param.requires_grad = False
-    return model
-# Function to train model
-def train_model(tokenized_train, tokenized_val, model):
-    training_args = TrainingArguments(
-        output_dir="./results",
-        num_train_epochs=5,
-        per_device_train_batch_size=32,
-        per_device_eval_batch_size=32,
-        eval_strategy="epoch",
-        save_strategy="epoch",
-        load_best_model_at_end=True,
-        metric_for_best_model="accuracy",
-        learning_rate=5e-5,
-        weight_decay=0.1,
-        report_to="none",
-    )
-    total_steps = len(tokenized_train) // training_args.per_device_train_batch_size * training_args.num_train_epochs
-    optimizer = torch.optim.AdamW(model.parameters(), lr=training_args.learning_rate)
-    trainer = Trainer(
-        model=model,
-        args=training_args,
-        train_dataset=tokenized_train,
-        eval_dataset=tokenized_val,
-        compute_metrics=lambda eval_pred: {"accuracy": evaluate.load("accuracy").compute(predictions=np.argmax(eval_pred.predictions, axis=1), references=eval_pred.label_ids)},
-        optimizers=(optimizer, get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)),
-    )
-    trainer.train()
-    trainer.save_model("./fine_tuned_model")
-    return trainer
-# Function to evaluate model
-def evaluate_model(pipe, df, model_name=""):
-    results = []
-    total_start = time.perf_counter()
-    for stock, group in tqdm(df.groupby("Stock")):
-        headlines = group["Headline"].tolist()
-        true_trend = group["Trend"].iloc[0]
-        try:
-            preds = pipe(headlines, truncation=True)
-        except Exception as e:
-            st.error(f"Error for {stock}: {e}")
-            continue
-        labels = [p['label'] for p in preds]
-        count = Counter(labels)
-        num_pos, num_neg = count.get("Positive", 0), count.get("Negative", 0)
-        predicted_trend = "Positive" if num_pos > num_neg else "Negative"
-        match = predicted_trend == true_trend
-        results.append(match)
-    total_runtime = time.perf_counter() - total_start
-    accuracy = sum(results) / len(results) if results else 0
-    st.write(f"**🔍 Evaluation Summary for {model_name}**")
-    st.write(f"✅ Accuracy: {accuracy:.2%}")
-    st.write(f"⏱ Total Runtime: {total_runtime:.2f} seconds")
-    return accuracy
 # Streamlit UI
-st.title("Financial Sentiment Analysis with FinBERT")
-st.markdown("Upload your CSV files to train and evaluate a sentiment analysis model on financial news headlines.")
-st.header("Upload CSV Files")
-news_file = st.file_uploader("Upload Train_stock_news.csv", type="csv")
-trend_file = st.file_uploader("Upload Training_price_comparison.csv", type="csv")
-if news_file and trend_file:
-    with st.spinner("Processing data..."):
-        df = load_and_process_data(news_file, trend_file)
-        check_class_imbalance(df)
-        train_df, val_df, test_df = split_data(df)
-        st.write(f"**Training stocks:** {len(train_df['Stock'].unique())}")
-        st.write(f"**Validation stocks:** {len(val_df['Stock'].unique())}")
-        st.write(f"**Test stocks:** {len(test_df['Stock'].unique())}")
-        tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
-        tokenized_train, tokenized_val, tokenized_test = tokenize_datasets(train_df, val_df, test_df, tokenizer)
-        model = load_model()
-        with st.spinner("Training model..."):
-            trainer = train_model(tokenized_train, tokenized_val, model)
-        st.success("Model training completed!")
-        # Evaluate original model
-        original_pipe = pipeline("text-classification", model="yiyanghkust/finbert-tone")
-        st.write("Evaluating original model...")
-        original_accuracy = evaluate_model(original_pipe, test_df, model_name="Original Model")
-        # Evaluate fine-tuned model
-        fine_tuned_pipe = pipeline("text-classification", model="./fine_tuned_model")
-        st.write("Evaluating fine-tuned model...")
-        fine_tuned_accuracy = evaluate_model(fine_tuned_pipe, test_df, model_name="Fine-tuned Model")
-        st.write(f"**Comparison:**")
-        st.write(f"Original Model Accuracy: {original_accuracy:.2%}")
-        st.write(f"Fine-tuned Model Accuracy: {fine_tuned_accuracy:.2%}")
-else:
-    st.warning("Please upload both CSV files to proceed.")

 import streamlit as st
+import requests
+from bs4 import BeautifulSoup
 from transformers import pipeline
+# Initialize sentiment analysis pipeline
+sentiment_pipeline = pipeline("sentiment-analysis")
+# Function to fetch top 3 news articles from FinViz
+def fetch_news(ticker):
+    try:
+        url = f"https://finviz.com/quote.ashx?t={ticker}"
+        headers = {'User-Agent': 'Mozilla/5.0'}
+        response = requests.get(url, headers=headers)
+        soup = BeautifulSoup(response.text, 'html.parser')
+        news_table = soup.find(id='news-table')
+        news = []
+        for row in news_table.findAll('tr')[:3]:  # Limit to top 3
+            title = row.a.get_text()
+            link = row.a['href']
+            news.append({'title': title, 'link': link})
+        return news
+    except Exception as e:
+        st.error(f"Failed to fetch news for {ticker}: {e}")
+        return []
+# Function to analyze sentiment of news title
+def analyze_sentiment(text):
+    try:
+        result = sentiment_pipeline(text)[0]
+        return "Positive" if result['label'] == 'POSITIVE' else "Negative"
+    except Exception as e:
+        st.error(f"Sentiment analysis failed: {e}")
+        return "Unknown"
 # Streamlit UI
+st.title("Stock News Sentiment Analysis")
+# Input field for stock tickers
+tickers_input = st.text_input("Enter five stock tickers separated by commas (e.g., AAPL, MSFT, GOOGL, AMZN, TSLA):")
+if st.button("Get News and Sentiment"):
+    if tickers_input:
+        tickers = [ticker.strip().upper() for ticker in tickers_input.split(',')]
+        # Validate input
+        if len(tickers) != 5:
+            st.error("Please enter exactly five stock tickers.")
+        else:
+            # Process each ticker
+            for ticker in tickers:
+                st.subheader(f"Top 3 News Articles for {ticker}")
+                news_list = fetch_news(ticker)
+                if news_list:
+                    for i, news in enumerate(news_list, 1):
+                        sentiment = analyze_sentiment(news['title'])
+                        st.markdown(f"{i}. [{news['title']}]({news['link']}) - **{sentiment}**")
+                else:
+                    st.write("No news available for this ticker.")
+    else:
+        st.warning("Please enter stock tickers.")