import pandas as pd
import numpy as np

# Load the stock data for TSLA and AAPL
tsla_data = pd.read_csv('TSLA_stock_data.csv')
aapl_data = pd.read_csv('AAPL_stock_data.csv')

# Display the first few rows to understand the structure
print("TSLA data columns:", tsla_data.columns)
print("AAPL data columns:", aapl_data.columns)

# Function to preprocess stock data
def preprocess_stock_data(stock_data):
    # Convert the date column to datetime
    stock_data['Date'] = pd.to_datetime(stock_data['Date'], errors='coerce')
    
    # Sort by date (just in case)
    stock_data = stock_data.sort_values('Date')
    
    # Handle missing values - forward fill for missing data
    stock_data = stock_data.fillna(method='ffill')
    
    # Add new features (e.g., moving averages)
    stock_data['MA5'] = stock_data['Close'].rolling(window=5).mean()  # 5-day moving average
    stock_data['MA20'] = stock_data['Close'].rolling(window=20).mean()  # 20-day moving average
    
    # Calculate the daily price change and percentage change
    stock_data['Price Change'] = stock_data['Close'].diff()
    stock_data['Pct Change'] = stock_data['Close'].pct_change() * 100
    
    # Fill NaN values in new features to avoid dropping rows
    stock_data['MA5'] = stock_data['MA5'].fillna(stock_data['Close'])  # Fill with close prices
    stock_data['MA20'] = stock_data['MA20'].fillna(stock_data['Close'])
    stock_data['Price Change'] = stock_data['Price Change'].fillna(0)  # Fill with 0 for no change
    stock_data['Pct Change'] = stock_data['Pct Change'].fillna(0)  # Fill with 0% for no change
    
    return stock_data

# Preprocess both TSLA and AAPL data
tsla_data = preprocess_stock_data(tsla_data)
aapl_data = preprocess_stock_data(aapl_data)

# Check the preprocessed data
print("TSLA Data after preprocessing:\n", tsla_data.head())
print("AAPL Data after preprocessing:\n", aapl_data.head())

# Save the cleaned data to new CSV files
tsla_data.to_csv('cleaned_TSLA_stock_data.csv', index=False)
aapl_data.to_csv('cleaned_AAPL_stock_data.csv', index=False)