kaganseyda commited on
Commit
57c6132
·
verified ·
1 Parent(s): 7b086fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -76
app.py CHANGED
@@ -1,3 +1,11 @@
 
 
 
 
 
 
 
 
1
  import yfinance as yf
2
  import pandas as pd
3
  import numpy as np
@@ -6,130 +14,221 @@ import talib
6
  import gradio as gr
7
  from datetime import date
8
  import os
 
9
 
10
- # TALib'teki tüm formasyon fonksiyonlarını dinamik olarak alalım
11
  TALIB_PATTERNS = sorted([name for name in dir(talib) if name.startswith("CDL")])
12
 
13
- def clean_ohlc(df):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  """
15
- OHLC verisini TA-Lib için temizle:
16
- - Sütun isimlerini normalize eder (büyük-küçük farkı önemsiz)
17
- - Open/High/Low/Close sütunlarını numeric'e çevirir
18
- - NaN içeren satırları atar
19
- - Index'i DatetimeIndex'e çevirir
20
  """
21
- # Tüm sütun adlarını lowercase yap
22
- df.columns = [c.lower() for c in df.columns]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # bazen 'adj close' falan da gelir, onları göz ardı ediyoruz
25
- required = ['open', 'high', 'low', 'close']
26
- for col in required:
27
- if col not in df.columns:
28
- raise ValueError(f"Veri setinde '{col}' sütunu yok. Mevcut: {list(df.columns)}")
29
 
30
- # sadece gerekli sütunları al
31
- df = df[required].copy()
 
 
 
 
 
 
 
32
 
33
- # numeric dönüşüm
34
- df = df.apply(pd.to_numeric, errors='coerce')
35
 
36
- # eksikleri at
37
- df = df.dropna(subset=required)
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- # index datetime değilse düzelt
40
- if not isinstance(df.index, pd.DatetimeIndex):
41
- df.index = pd.to_datetime(df.index, errors='coerce')
42
- df = df.dropna(subset=['open', 'high', 'low', 'close'])
 
43
 
44
- if len(df) == 0:
45
- raise ValueError("Veri temizlendikten sonra boş kaldı.")
46
- return df
 
 
 
 
 
 
 
47
 
 
 
 
48
 
49
- def find_candlestick_patterns(df, pattern_name):
 
 
 
 
 
 
 
 
 
 
50
  """
51
- Verilen formasyon adını kullanarak grafikte işaretleme yapan fonksiyon.
52
- Döndürür: (apds_list, None) veya ([], error_message)
53
  """
 
54
  try:
55
  pattern_func = getattr(talib, pattern_name)
56
  except AttributeError:
57
  return [], f"Error: '{pattern_name}' formasyonu TALib'te bulunamadı."
 
 
58
 
 
59
  try:
60
- # Clean OHLC just in case
61
  df_clean = clean_ohlc(df)
62
  except Exception as e:
63
  return [], f"Veri temizleme hatası: {e}"
64
 
65
  try:
66
- # TA-Lib kesinlikle 1D numpy array istiyor
67
- open_arr = np.asarray(df_clean['Open'].astype(float)).ravel()
68
- high_arr = np.asarray(df_clean['High'].astype(float)).ravel()
69
- low_arr = np.asarray(df_clean['Low'].astype(float)).ravel()
70
- close_arr= np.asarray(df_clean['Close'].astype(float)).ravel()
71
 
72
- # Tüm array'lerin aynı uzunlukta olduğundan emin ol
73
  n = len(open_arr)
74
  if not (len(high_arr) == len(low_arr) == len(close_arr) == n):
75
  return [], "Error: OHLC array uzunlukları eşit değil."
 
 
76
 
77
- # pattern fonksiyonunu çağır
 
78
  pattern_result = pattern_func(open_arr, high_arr, low_arr, close_arr)
79
-
80
- # TA-Lib bazen farklı dtype dönebilir; pandas serisi yap
81
- # index'i df_clean.index kullanıyoruz (satır atılmış olabilir)
82
- pattern_result = pd.Series(pattern_result, index=df_clean.index)
83
  except Exception as e:
84
  return [], f"TALib çalıştırma hatası: {e}"
85
 
86
  apds = []
87
 
88
- # Boğa formasyonları (pozitif değerler)
89
- bullish = pattern_result[pattern_result > 0]
90
- if not bullish.empty:
91
- bullish_points = pd.Series(np.nan, index=df.index) # orijinal df index ile hizala (grafikte boşluk kalırsa sorun olmaz)
92
- for idx in bullish.index:
93
- # Eğer idx orijinal df'de yoksa (nadiren) guard koy
94
- if idx in df.index:
95
- bullish_points.loc[idx] = df.loc[idx, 'Low'] * 0.98
96
  apds.append(
97
  mpf.make_addplot(
98
  bullish_points,
99
  type='scatter',
100
  marker='^',
101
- markersize=80,
102
  color='green',
103
  panel=0,
104
- alpha=0.8
105
  )
106
  )
107
 
108
- # Ayı formasyonları (negatif değerler)
109
- bearish = pattern_result[pattern_result < 0]
110
- if not bearish.empty:
111
- bearish_points = pd.Series(np.nan, index=df.index)
112
- for idx in bearish.index:
113
- if idx in df.index:
114
- bearish_points.loc[idx] = df.loc[idx, 'High'] * 1.02
115
  apds.append(
116
  mpf.make_addplot(
117
  bearish_points,
118
  type='scatter',
119
  marker='v',
120
- markersize=80,
121
  color='red',
122
  panel=0,
123
- alpha=0.8
124
  )
125
  )
126
 
127
  return apds, None
128
 
129
- def plot_stock_with_patterns(ticker_symbol, start_date, end_date, selected_patterns):
130
  """
131
- Hisse grafiğini seçilen formasyonlarla birlikte çizdirir.
132
- Döndürür: (image_filepath veya None, status_message)
133
  """
134
  if not ticker_symbol:
135
  return None, "Error: Hisse sembolü boş olamaz."
@@ -139,48 +238,56 @@ def plot_stock_with_patterns(ticker_symbol, start_date, end_date, selected_patte
139
  end = pd.to_datetime(end_date)
140
  if start >= end:
141
  return None, "Error: Başlangıç tarihi bitişten önce olmalı."
142
- except (ValueError, TypeError):
143
  return None, "Error: Tarih formatı geçersiz. YYYY-MM-DD şeklinde girin."
144
 
 
145
  try:
146
  df = yf.download(ticker_symbol, start=start_date, end=end_date, progress=False)
147
- if df.empty:
148
- return None, f"Error: '{ticker_symbol}' için veri bulunamadı."
149
  except Exception as e:
150
  return None, f"Veri indirilirken hata oluştu: {e}"
151
 
152
- # Temizleme: OHLC numeric ve NaN'ları at
153
  try:
154
- df = clean_ohlc(df)
155
  except Exception as e:
156
  return None, f"Veri temizleme hatası: {e}"
157
 
 
158
  all_apds = []
159
  if selected_patterns:
 
 
 
160
  for pattern_name in selected_patterns:
161
- pattern_apds, err = find_candlestick_patterns(df, pattern_name)
162
  if err:
163
  return None, err
164
  all_apds.extend(pattern_apds)
165
 
166
- # geçici klasör
167
  os.makedirs('/tmp', exist_ok=True)
168
  safe_ticker = str(ticker_symbol).replace("/", "_").replace("\\", "_")
169
  fig_path = f"/tmp/stock_chart_{safe_ticker}_{pd.Timestamp.now().strftime('%Y%m%d%H%M%S')}.png"
170
 
 
171
  s = mpf.make_mpf_style(base_mpf_style='yahoo', mavcolors=['#1f77b4', '#ff7f0e', '#2ca02c'])
172
 
 
 
 
173
  try:
174
- # mpf.plot index'in DatetimeIndex olmasını bekler (biz onu sağladık)
175
  fig, _ = mpf.plot(
176
- df,
177
  type='candle',
178
- volume=True,
179
  addplot=all_apds if all_apds else None,
180
  style=s,
181
  title=f"\n{ticker_symbol} Candlestick Chart",
182
- ylabel='Price ($)',
183
- ylabel_lower='Volume',
184
  returnfig=True,
185
  figscale=1.5
186
  )
 
1
+ """
2
+ Robust TALib + mplfinance + Gradio example
3
+ - Normalizes column names (handles tuple/MultiIndex columns)
4
+ - Keeps Volume if present
5
+ - Converts everything numeric and drops NaN OHLC rows
6
+ - Calls TALib pattern functions safely and plots with mplfinance
7
+ """
8
+
9
  import yfinance as yf
10
  import pandas as pd
11
  import numpy as np
 
14
  import gradio as gr
15
  from datetime import date
16
  import os
17
+ from typing import Optional, Tuple, List
18
 
19
+ # Pattern list from talib (CDL* functions)
20
  TALIB_PATTERNS = sorted([name for name in dir(talib) if name.startswith("CDL")])
21
 
22
+ def _normalize_col_name(col) -> str:
23
+ """Turn any column key into a simple lowercase string.
24
+ Handles tuples (MultiIndex) by joining parts with '_'."""
25
+ if isinstance(col, (tuple, list)):
26
+ # join non-empty parts, convert to string
27
+ parts = [str(c) for c in col if c is not None]
28
+ joined = "_".join(parts)
29
+ return joined.strip().lower()
30
+ return str(col).strip().lower()
31
+
32
+ def _find_best_col(key: str, columns: List[str]) -> Optional[str]:
33
+ """Given normalized columns, find best matching column for key (open/high/low/close/volume)."""
34
+ key = key.lower()
35
+ # Exact match
36
+ if key in columns:
37
+ return key
38
+ # Prefer suffix matches (e.g., msft_open)
39
+ for c in columns:
40
+ if c.endswith("_" + key):
41
+ return c
42
+ # Fallback: any column containing the key
43
+ for c in columns:
44
+ if key in c:
45
+ return c
46
+ return None
47
+
48
+ def clean_ohlc(df: pd.DataFrame) -> pd.DataFrame:
49
  """
50
+ Normalize column names, find OHLC (and optional Volume), convert to numeric,
51
+ drop NaNs and ensure DatetimeIndex. Returns a DataFrame with columns:
52
+ ['Open','High','Low','Close'] and optionally 'Volume'.
53
+ Raises ValueError on irrecoverable problems.
 
54
  """
55
+ if df is None:
56
+ raise ValueError("Gelen dataframe None.")
57
+ if not isinstance(df, pd.DataFrame):
58
+ raise ValueError(f"Gelen obje DataFrame değil: {type(df)}")
59
+
60
+ # Make a copy to avoid mutating caller's df
61
+ df_work = df.copy()
62
+
63
+ # Flatten column names into simple lowercase strings
64
+ try:
65
+ normalized_columns = [_normalize_col_name(c) for c in df_work.columns]
66
+ except Exception as e:
67
+ # worst-case fallback: convert all to string then normalize
68
+ normalized_columns = [str(c).strip().lower() for c in df_work.columns]
69
+
70
+ # assign normalized columns temporarily (we'll keep mapping)
71
+ col_map = dict(zip(normalized_columns, df_work.columns))
72
+ df_work.columns = normalized_columns
73
+
74
+ # find required columns
75
+ required_keys = ['open', 'high', 'low', 'close']
76
+ found = {}
77
+ for k in required_keys:
78
+ matched = _find_best_col(k, normalized_columns)
79
+ if matched is None:
80
+ raise ValueError(f"Veride '{k}' sütunu bulunamadı. Mevcut sütunlar: {list(normalized_columns)}")
81
+ found[k] = matched
82
+
83
+ # optional volume
84
+ vol_key = _find_best_col('volume', normalized_columns)
85
+ include_volume = vol_key is not None
86
 
87
+ # select and rename to canonical names
88
+ cols_to_take = [found['open'], found['high'], found['low'], found['close']]
89
+ if include_volume:
90
+ cols_to_take.append(vol_key)
 
91
 
92
+ df_sel = df_work[cols_to_take].copy()
93
+ rename_map = {
94
+ found['open']: 'Open',
95
+ found['high']: 'High',
96
+ found['low']: 'Low',
97
+ found['close']: 'Close'
98
+ }
99
+ if include_volume:
100
+ rename_map[vol_key] = 'Volume'
101
 
102
+ df_sel = df_sel.rename(columns=rename_map)
 
103
 
104
+ # index -> DatetimeIndex if possible
105
+ if not isinstance(df_sel.index, pd.DatetimeIndex):
106
+ # try 'date' column if exists
107
+ if 'date' in df_sel.columns:
108
+ try:
109
+ df_sel.index = pd.to_datetime(df_sel['date'], errors='coerce')
110
+ df_sel = df_sel.drop(columns=['date'])
111
+ except Exception:
112
+ pass
113
+ else:
114
+ # try to parse existing index
115
+ try:
116
+ df_sel.index = pd.to_datetime(df_sel.index, errors='coerce')
117
+ except Exception:
118
+ pass
119
 
120
+ # convert to numeric
121
+ numeric_cols = ['Open', 'High', 'Low', 'Close']
122
+ if include_volume:
123
+ numeric_cols.append('Volume')
124
+ df_sel[numeric_cols] = df_sel[numeric_cols].apply(pd.to_numeric, errors='coerce')
125
 
126
+ # drop rows with NaN OHLC or NaT index
127
+ if not isinstance(df_sel.index, pd.DatetimeIndex):
128
+ # if index still not datetime, try to reset index and parse date column
129
+ df_sel = df_sel.reset_index()
130
+ if 'index' in df_sel.columns:
131
+ try:
132
+ df_sel['index'] = pd.to_datetime(df_sel['index'], errors='coerce')
133
+ df_sel = df_sel.set_index('index')
134
+ except Exception:
135
+ pass
136
 
137
+ # drop any rows with NaT index now
138
+ if isinstance(df_sel.index, pd.DatetimeIndex):
139
+ df_sel = df_sel[~df_sel.index.isna()]
140
 
141
+ df_sel = df_sel.dropna(subset=['Open', 'High', 'Low', 'Close']).copy()
142
+
143
+ if df_sel.empty:
144
+ raise ValueError("Veri temizlendikten sonra boş kaldı (OHLC yok).")
145
+
146
+ # ensure ordering ascending by date
147
+ df_sel = df_sel.sort_index()
148
+
149
+ return df_sel
150
+
151
+ def find_candlestick_patterns(df: pd.DataFrame, pattern_name: str) -> Tuple[List, Optional[str]]:
152
  """
153
+ Run TALib pattern and return list of addplots (apds) for mplfinance,
154
+ or ([], error_message) on error.
155
  """
156
+ # check callable pattern
157
  try:
158
  pattern_func = getattr(talib, pattern_name)
159
  except AttributeError:
160
  return [], f"Error: '{pattern_name}' formasyonu TALib'te bulunamadı."
161
+ if not callable(pattern_func):
162
+ return [], f"Error: '{pattern_name}' TALib içinde callable değil."
163
 
164
+ # clean and prepare data
165
  try:
 
166
  df_clean = clean_ohlc(df)
167
  except Exception as e:
168
  return [], f"Veri temizleme hatası: {e}"
169
 
170
  try:
171
+ open_arr = np.asarray(df_clean['Open'].values, dtype=float).ravel()
172
+ high_arr = np.asarray(df_clean['High'].values, dtype=float).ravel()
173
+ low_arr = np.asarray(df_clean['Low'].values, dtype=float).ravel()
174
+ close_arr= np.asarray(df_clean['Close'].values, dtype=float).ravel()
 
175
 
176
+ # sanity check lengths
177
  n = len(open_arr)
178
  if not (len(high_arr) == len(low_arr) == len(close_arr) == n):
179
  return [], "Error: OHLC array uzunlukları eşit değil."
180
+ except Exception as e:
181
+ return [], f"Array dönüşüm hatası: {e}"
182
 
183
+ # call talib
184
+ try:
185
  pattern_result = pattern_func(open_arr, high_arr, low_arr, close_arr)
186
+ pattern_series = pd.Series(pattern_result, index=df_clean.index)
 
 
 
187
  except Exception as e:
188
  return [], f"TALib çalıştırma hatası: {e}"
189
 
190
  apds = []
191
 
192
+ # bullish (positive)
193
+ bull_idx = pattern_series[pattern_series > 0].index
194
+ if len(bull_idx) > 0:
195
+ bullish_points = pd.Series(np.nan, index=df_clean.index)
196
+ # vectorized assignment for indices
197
+ bullish_points.loc[bull_idx] = df_clean.loc[bull_idx, 'Low'] * 0.98
 
 
198
  apds.append(
199
  mpf.make_addplot(
200
  bullish_points,
201
  type='scatter',
202
  marker='^',
203
+ markersize=60,
204
  color='green',
205
  panel=0,
206
+ alpha=0.85
207
  )
208
  )
209
 
210
+ # bearish (negative)
211
+ bear_idx = pattern_series[pattern_series < 0].index
212
+ if len(bear_idx) > 0:
213
+ bearish_points = pd.Series(np.nan, index=df_clean.index)
214
+ bearish_points.loc[bear_idx] = df_clean.loc[bear_idx, 'High'] * 1.02
 
 
215
  apds.append(
216
  mpf.make_addplot(
217
  bearish_points,
218
  type='scatter',
219
  marker='v',
220
+ markersize=60,
221
  color='red',
222
  panel=0,
223
+ alpha=0.85
224
  )
225
  )
226
 
227
  return apds, None
228
 
229
+ def plot_stock_with_patterns(ticker_symbol: str, start_date: str, end_date: str, selected_patterns) -> Tuple[Optional[str], str]:
230
  """
231
+ Main handler for Gradio. Returns (image_filepath or None, status_message).
 
232
  """
233
  if not ticker_symbol:
234
  return None, "Error: Hisse sembolü boş olamaz."
 
238
  end = pd.to_datetime(end_date)
239
  if start >= end:
240
  return None, "Error: Başlangıç tarihi bitişten önce olmalı."
241
+ except Exception:
242
  return None, "Error: Tarih formatı geçersiz. YYYY-MM-DD şeklinde girin."
243
 
244
+ # download
245
  try:
246
  df = yf.download(ticker_symbol, start=start_date, end=end_date, progress=False)
247
+ if df is None or (isinstance(df, pd.DataFrame) and df.empty):
248
+ return None, f"Error: '{ticker_symbol}' için veri bulunamadı (yfinance boş döndü)."
249
  except Exception as e:
250
  return None, f"Veri indirilirken hata oluştu: {e}"
251
 
252
+ # Clean once here to feed to mplfinance and TALib
253
  try:
254
+ df_clean = clean_ohlc(df)
255
  except Exception as e:
256
  return None, f"Veri temizleme hatası: {e}"
257
 
258
+ # patterns
259
  all_apds = []
260
  if selected_patterns:
261
+ # selected_patterns might be tuple/list/str
262
+ if isinstance(selected_patterns, str):
263
+ selected_patterns = [selected_patterns]
264
  for pattern_name in selected_patterns:
265
+ pattern_apds, err = find_candlestick_patterns(df_clean, pattern_name)
266
  if err:
267
  return None, err
268
  all_apds.extend(pattern_apds)
269
 
270
+ # prepare fig path
271
  os.makedirs('/tmp', exist_ok=True)
272
  safe_ticker = str(ticker_symbol).replace("/", "_").replace("\\", "_")
273
  fig_path = f"/tmp/stock_chart_{safe_ticker}_{pd.Timestamp.now().strftime('%Y%m%d%H%M%S')}.png"
274
 
275
+ # style
276
  s = mpf.make_mpf_style(base_mpf_style='yahoo', mavcolors=['#1f77b4', '#ff7f0e', '#2ca02c'])
277
 
278
+ # detect if volume exists
279
+ has_volume = 'Volume' in df_clean.columns
280
+
281
  try:
 
282
  fig, _ = mpf.plot(
283
+ df_clean,
284
  type='candle',
285
+ volume=has_volume,
286
  addplot=all_apds if all_apds else None,
287
  style=s,
288
  title=f"\n{ticker_symbol} Candlestick Chart",
289
+ ylabel='Price',
290
+ ylabel_lower='Volume' if has_volume else None,
291
  returnfig=True,
292
  figscale=1.5
293
  )