import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import re
from datetime import datetime
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

nltk.download('vader_lexicon')

st.set_page_config(page_title="Daily News with Sentiment Analysis", layout="wide")
st.title('Daily News with Sentiment Analysis')

# Sidebar inputs
st.sidebar.title('Input Parameters')

with st.sidebar.expander("How to use:", expanded=False):
    st.markdown("""
    1. **Enter Ticker**: Type the stock symbol (e.g., 'AAPL').
    2. **Fetch News**: Click 'Get News' for the latest articles.
    3. **View Sentiment**: See articles with sentiment scores (-1 to 1).
    4. **Sort & Download**: Download as CSV if needed.
    """)

# Wrapping the stock ticker input in an expander
with st.sidebar.expander("Stock Ticker Input", expanded=True):
    ticker = st.text_input('Enter Stock Ticker', 'AAPL')

def get_finviz_news_raw(ticker):
    finviz_url = f'https://finviz.com/quote.ashx?t={ticker}'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}
    response = requests.get(finviz_url, headers=headers)

    if response.status_code != 200:
        st.error(f"Error fetching data for {ticker}")
        return None

    soup = BeautifulSoup(response.text, "html.parser")
    news_table = soup.find("table", class_="fullview-news-outer")

    if not news_table:
        st.error(f"No news found for {ticker}")
        return None

    news_rows = news_table.find_all("tr")
    raw_data = []

    for row in news_rows:
        date_time_cell = row.find("td", align="right")
        news_source_cell = row.find("td", class_="news-link-right")
        news_title_cell = row.find("a", class_="tab-link-news")

        date_time_str = date_time_cell.get_text().strip() if date_time_cell else ''
        news_source = news_source_cell.get_text() if news_source_cell else ''
        news_title = news_title_cell.get_text().strip() if news_title_cell else ''
        news_link = news_title_cell['href'] if news_title_cell else ''

        raw_data.append({
            "Datetime": date_time_str,
            "Source": news_source,
            "Title": news_title,
            "Link": news_link
        })

    df = pd.DataFrame(raw_data)
    return df

# Preprocess text for sentiment analysis
def preprocess_text(text):
    text = re.sub(r'<.*?>', '', text)  # Remove HTML tags
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove special characters and digits
    text = re.sub(r'\s+', ' ', text).strip()  # Remove extra spaces
    return text

# Analyze sentiment using VADER
def analyze_sentiment(text):
    sia = SentimentIntensityAnalyzer()
    sentiment = sia.polarity_scores(text)
    return sentiment['compound']  # Sentiment score between -1 (negative) and 1 (positive)

# Fetch and display news when the button is pressed
if st.sidebar.button('Get News'):
    news_df_raw = get_finviz_news_raw(ticker)
    
    if news_df_raw is not None and not news_df_raw.empty:
        df = news_df_raw.copy()

        # Get today's date
        today_date = datetime.today().strftime('%Y-%m-%d')

        # Initialize the date variable
        current_date = None

        # Lists to store the new column values
        dates = []
        times = []

        # Process each row in the dataframe
        for index, row in df.iterrows():
            datetime_value = row['Datetime']
            
            # Replace 'Today' with today's date
            if 'Today' in datetime_value:
                datetime_value = datetime_value.replace('Today', today_date)
                
            # Check if the datetime_value contains a date
            if ' ' in datetime_value and not datetime_value.split(' ')[0].isdigit():
                # Extract date and time
                current_date = datetime_value.split(' ')[0]
                time_value = ' '.join(datetime_value.split(' ')[1:])
            else:
                # Only time is provided
                time_value = datetime_value
            
            # Add the current date and time to the lists
            dates.append(current_date)
            times.append(time_value)

        # Add the new columns to the dataframe
        df['Date'] = dates
        df['Time'] = times

        # Add Sentiment and Relevance
        df['Cleaned_Title'] = df['Title'].apply(preprocess_text)
        df['Sentiment'] = df['Cleaned_Title'].apply(analyze_sentiment)

        # Remove Cleaned_Title column
        df.drop(columns=['Cleaned_Title', 'Datetime', 'Source'], inplace=True)
        df = df.reset_index(drop=True)
        
        # Make links clickable
        df['Title'] = df.apply(lambda x: f'<a href="{x["Link"]}" target="_blank">{x["Title"]}</a>', axis=1)
        df.drop(columns=['Link'], inplace=True)

        st.write(f"## News for {ticker}")
        st.markdown(df.to_html(escape=False, index=False), unsafe_allow_html=True)

        # Option to download data as CSV
        csv = df.to_csv(index=False).encode('utf-8')
        st.download_button(
            label="Download data as CSV",
            data=csv,
            file_name=f'{ticker}_news_sentiment.csv',
            mime='text/csv',
        )
    else:
        st.write(f"No news found for {ticker}")

hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)