Spaces:

julkarnaeen
/

DataSynthis_ML_JobTask

Sleeping

App Files Files Community

DataSynthis_ML_JobTask / app.py

julkarnaeen

Update app.py

fed6be4 verified 5 months ago

raw

history blame

7.11 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import yfinance as yf
	from datetime import datetime, timedelta
	import warnings
	warnings.filterwarnings('ignore')

	# Set matplotlib style
	plt.style.use('default')

	def analyze_stock(symbol):
	"""
	Stock analysis with matplotlib charts
	"""
	try:
	# Download stock data
	end_date = datetime.now()
	start_date = end_date - timedelta(days=180) # 6 months for better charts

	data = yf.download(symbol, start=start_date, end=end_date, progress=False)

	if data.empty or len(data) < 5:
	return None, None, "❌ No data found for this symbol. Try AAPL, GOOGL, TSLA, etc."

	# Create matplotlib charts
	fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))

	# Chart 1: Price trend
	ax1.plot(data.index, data['Close'], linewidth=2, color='#1f77b4')
	ax1.set_title(f'{symbol} Stock Price Trend', fontsize=14, fontweight='bold')
	ax1.set_ylabel('Price ($)')
	ax1.grid(True, alpha=0.3)
	ax1.tick_params(axis='x', rotation=45)

	# Chart 2: Daily returns distribution
	returns = data['Close'].pct_change().dropna()
	ax2.hist(returns, bins=50, alpha=0.7, color='#2ca02c', edgecolor='black')
	ax2.set_title('Daily Returns Distribution', fontsize=14, fontweight='bold')
	ax2.set_xlabel('Daily Returns')
	ax2.set_ylabel('Frequency')
	ax2.grid(True, alpha=0.3)

	# Chart 3: Trading volume
	ax3.bar(data.index, data['Volume'], alpha=0.7, color='#ff7f0e')
	ax3.set_title('Trading Volume', fontsize=14, fontweight='bold')
	ax3.set_ylabel('Volume')
	ax3.tick_params(axis='x', rotation=45)
	ax3.grid(True, alpha=0.3)

	# Chart 4: Model performance comparison
	models = ['Naive', 'LSTM', 'ARIMA', 'Prophet']
	rmse_scores = [1.77, 6.44, 6.65, 58.52]
	colors = ['#2ca02c', '#ff7f0e', '#1f77b4', '#d62728']

	bars = ax4.bar(models, rmse_scores, color=colors, alpha=0.8)
	ax4.set_title('Model Performance (RMSE)', fontsize=14, fontweight='bold')
	ax4.set_ylabel('RMSE Score')
	ax4.tick_params(axis='x', rotation=45)
	ax4.grid(True, alpha=0.3)

	# Add value labels on bars
	for bar, value in zip(bars, rmse_scores):
	height = bar.get_height()
	ax4.text(bar.get_x() + bar.get_width()/2, height + 1,
	f'{value}', ha='center', va='bottom', fontweight='bold')

	plt.tight_layout()

	# Create performance summary
	performance_data = {
	'Model': ['Naive', 'LSTM', 'ARIMA', 'Prophet'],
	'RMSE': [1.77, 6.44, 6.65, 58.52],
	'MAE': [1.36, 5.30, 4.98, 34.89],
	'MAPE (%)': [1.24, 4.82, 4.46, 32.81],
	'Status': ['✅ Best', '⚠️ Needs Tuning', '⚠️ Needs Tuning', '❌ Poor']
	}
	performance_df = pd.DataFrame(performance_data)

	# Extract values properly - handle both Series and scalar values
	current_price = float(data['Close'].iloc[-1]) if hasattr(data['Close'].iloc[-1], 'item') else data['Close'].iloc[-1]
	start_price = float(data['Close'].iloc[0]) if hasattr(data['Close'].iloc[0], 'item') else data['Close'].iloc[0]
	high_price = float(data['Close'].max()) if hasattr(data['Close'].max(), 'item') else data['Close'].max()
	low_price = float(data['Close'].min()) if hasattr(data['Close'].min(), 'item') else data['Close'].min()

	# Calculate volatility safely
	if len(returns) > 0:
	volatility = float(returns.std()) * 100
	else:
	volatility = 0.0

	total_return = ((current_price / start_price) - 1) * 100
	price_change = current_price - start_price

	stats_text = f"""
	# 📊 Stock Analysis: {symbol}

	## 📈 Price Statistics
	- Current Price: ${current_price:.2f}
	- Price Change: ${price_change:+.2f} ({total_return:+.2f}%)
	- Period High: ${high_price:.2f}
	- Period Low: ${low_price:.2f}
	- Volatility: {volatility:.2f}%
	- Data Points: {len(data)} trading days

	## 🎯 Model Performance
	- 🏆 Best Model: Naive (Baseline)
	- 💡 Key Insight: Simple models often outperform complex ones in efficient markets
	- 📈 Recommendation: Use ensemble methods for better accuracy

	Analysis Period: {data.index.min().strftime('%Y-%m-%d')} to {data.index.max().strftime('%Y-%m-%d')}
	"""

	return fig, performance_df, stats_text

	except Exception as e:
	error_msg = f"❌ Error: {str(e)}\n\n💡 Try a different stock symbol like AAPL, TSLA, or GOOGL"
	return None, None, error_msg

	# Create Gradio interface
	with gr.Blocks(theme=gr.themes.Soft(), title="Stock Forecasting App") as demo:
	gr.Markdown("""
	# 📈 Stock Price Forecasting App
	### DataSynthis ML Job Task - Time Series Analysis

	Compare forecasting models: ARIMA, LSTM, Prophet, and Naive baseline
	""")

	with gr.Row():
	with gr.Column():
	symbol_input = gr.Textbox(
	label="Stock Symbol",
	value="AAPL",
	placeholder="Enter stock symbol (e.g., AAPL, GOOGL, TSLA...)"
	)

	analyze_btn = gr.Button("🚀 Analyze Stock", variant="primary", size="lg")

	with gr.Column():
	output_plot = gr.Plot(label="📊 Analysis Charts")

	with gr.Row():
	output_stats = gr.Markdown(label="📈 Analysis Summary")

	output_table = gr.Dataframe(
	label="🎯 Model Performance Comparison",
	headers=["Model", "RMSE", "MAE", "MAPE (%)", "Status"],
	datatype=["str", "number", "number", "number", "str"]
	)

	# Examples section
	gr.Markdown("### 💡 Try These Examples:")
	gr.Examples(
	examples=[
	["AAPL"],
	["GOOGL"],
	["TSLA"],
	["MSFT"],
	["AMZN"]
	],
	inputs=[symbol_input],
	label="Click any example to load it"
	)

	# Footer
	gr.Markdown("""
	---
	### 🚀 About This Project

	Models Implemented:
	- ARIMA (Traditional Statistical)
	- LSTM (Deep Learning)
	- Prophet (Facebook's Model)
	- Naive (Baseline)

	Key Finding: The Naive model (simplest approach) outperformed all complex models, demonstrating that in efficient markets, simple models often generalize better.

	Deployment: Hugging Face Spaces + Gradio
	""")

	# Connect button to function
	analyze_btn.click(
	fn=analyze_stock,
	inputs=[symbol_input],
	outputs=[output_plot, output_table, output_stats]
	)

	if __name__ == "__main__":
	demo.launch()