pvyas96 commited on
Commit
4a31363
·
verified ·
1 Parent(s): 86f7ccd

Update src/utils.py

Browse files
Files changed (1) hide show
  1. src/utils.py +37 -293
src/utils.py CHANGED
@@ -1,296 +1,40 @@
1
  import yfinance as yf
2
  import pandas as pd
3
- # Using curl_cffi for session robustness to bypass potential anti-bot measures
4
- from curl_cffi import requests
5
 
6
- class StockAnalyzer:
7
- def __init__(self, ticker_symbol):
8
- # --- FIX 1: Auto-correct Ticker for Indian Market ---
9
- ticker_symbol = ticker_symbol.upper().strip()
10
- if not (ticker_symbol.endswith('.NS') or ticker_symbol.endswith('.BO')) and ticker_symbol.isalpha():
11
- self.ticker_symbol = f"{ticker_symbol}.NS"
12
- else:
13
- self.ticker_symbol = ticker_symbol
14
-
15
- # --- ROBUST SESSION ---
16
- self.session = requests.Session(impersonate="chrome")
17
- self.ticker = yf.Ticker(self.ticker_symbol, session=self.session)
18
-
19
- # --- FIX 2: Safe Fetching of ALL financial data (Annual Data) ---
20
- self.info = self.ticker.info or {} # Info is already wrapped in original code
21
-
22
- # Wrap statement fetching in try/except blocks to ensure object initialization doesn't fail
23
- try:
24
- self.income_stmt = self.ticker.income_stmt
25
- except Exception:
26
- self.income_stmt = pd.DataFrame()
27
-
28
- try:
29
- self.balance_sheet = self.ticker.balance_sheet
30
- except Exception:
31
- self.balance_sheet = pd.DataFrame()
32
-
33
- try:
34
- self.cashflow = self.ticker.cashflow
35
- except Exception:
36
- self.cashflow = pd.DataFrame()
37
-
38
- def _get_data(self, df, possible_keys):
39
- """
40
- Smart Search: Tries to find data using a list of possible key names.
41
- Returns None if not found.
42
- """
43
- if df is None or df.empty:
44
- return None
45
-
46
- # Create a normalized map for case-insensitive lookup, stripping whitespace
47
- index_map = {str(idx).strip().lower(): idx for idx in df.index}
48
-
49
- for key in possible_keys:
50
- # 1. Exact Match (case-sensitive)
51
- if key in df.index:
52
- return df.loc[key]
53
-
54
- # 2. Case-Insensitive Match
55
- normalized_key = key.strip().lower()
56
- if normalized_key in index_map:
57
- real_key = index_map[normalized_key]
58
- return df.loc[real_key]
59
-
60
- return None
61
-
62
- def get_financial_trends(self):
63
- try:
64
- # --- 1. INCOME STATEMENT (Includes Gross Profit for Piotroski Score) ---
65
- rev = self._get_data(self.income_stmt, [
66
- 'Total Revenue', 'Revenue', 'Operating Revenue', 'TotalOperatingRevenue'
67
- ])
68
- ni = self._get_data(self.income_stmt, [
69
- 'Net Income', 'Net Income Common Stockholders', 'NetIncomeContinuousOperations', 'NetIncome'
70
- ])
71
- gp = self._get_data(self.income_stmt, [
72
- 'Gross Profit', 'GrossProfit'
73
- ]) # NEW: Needed for Gross Margin Change
74
-
75
- # --- 2. BALANCE SHEET (Includes Current Assets/Liabs, Debt, and Shares for Piotroski Score) ---
76
- assets = self._get_data(self.balance_sheet, [
77
- 'Total Assets', 'Assets', 'TotalAssets'
78
- ])
79
- liabs = self._get_data(self.balance_sheet, [
80
- 'Total Liabilities Net Minority Interest', 'Total Liabilities', 'TotalLiabilities'
81
- ])
82
- equity = self._get_data(self.balance_sheet, [
83
- 'Stockholders Equity', 'Total Equity Gross Minority Interest', 'TotalEquityGrossMinorityInterest'
84
- ])
85
- current_assets = self._get_data(self.balance_sheet, [
86
- 'Current Assets', 'Total Current Assets', 'CurrentAssets'
87
- ]) # NEW: Needed for Current Ratio
88
- current_liabilities = self._get_data(self.balance_sheet, [
89
- 'Current Liabilities', 'Total Current Liabilities', 'CurrentLiabilities'
90
- ]) # NEW: Needed for Current Ratio
91
- total_debt = self._get_data(self.balance_sheet, [
92
- 'Total Debt', 'Long Term Debt', 'TotalDebt', 'LongTermDebt'
93
- ]) # Using total debt for comprehensive leverage check
94
- shares = self._get_data(self.balance_sheet, [
95
- 'Share Issued', 'Common Stock', 'CommonStock'
96
- ]) # Proxy for shares outstanding/issuance
97
-
98
- # --- 3. CASH FLOW ---
99
- ocf = self._get_data(self.cashflow, [
100
- 'Operating Cash Flow', 'Total Cash From Operating Activities', 'OperatingCashFlow'
101
- ])
102
-
103
- # Smart FCF Calculation: If 'Free Cash Flow' is missing, calculate it (OCF + CapEx)
104
- fcf = self._get_data(self.cashflow, ['Free Cash Flow', 'FreeCashFlow'])
105
- if fcf is None and ocf is not None:
106
- capex = self._get_data(self.cashflow, ['Capital Expenditure', 'Capital Expenditures', 'CapitalExpenditure'])
107
- if capex is not None:
108
- fcf = ocf + capex
109
-
110
- # Combine into DataFrame
111
- data = {}
112
- if rev is not None: data['Revenue'] = rev
113
- if ni is not None: data['Net Income'] = ni
114
- if gp is not None: data['Gross Profit'] = gp # ADDED
115
- if assets is not None: data['Total Assets'] = assets
116
- if liabs is not None: data['Total Liabilities'] = liabs
117
- if equity is not None: data['Equity'] = equity
118
- if current_assets is not None: data['Current Assets'] = current_assets # ADDED
119
- if current_liabilities is not None: data['Current Liabilities'] = current_liabilities # ADDED
120
- if total_debt is not None: data['Total Debt'] = total_debt # ADDED
121
- if shares is not None: data['Shares Issued'] = shares # ADDED
122
- if ocf is not None: data['Operating Cash Flow'] = ocf
123
- if fcf is not None: data['Free Cash Flow'] = fcf
124
-
125
- if not data:
126
- return pd.DataFrame()
127
-
128
- df = pd.DataFrame(data)
129
-
130
- # Ensure Index is DateTime and Sorted
131
- df.index = pd.to_datetime(df.index)
132
- df = df.sort_index()
133
-
134
- return df
135
-
136
- except Exception as e:
137
- # print(f"Error in get_financial_trends: {e}") # Suppress console output in final app
138
- return pd.DataFrame()
139
-
140
- def get_summary_metrics(self):
141
- # Unchanged from original
142
- i = self.info
143
- return {
144
- "current_price": i.get("currentPrice") or i.get("regularMarketPrice"),
145
- "market_cap": i.get("marketCap"),
146
- "pe_ratio": i.get("trailingPE"),
147
- "dividend_yield": i.get("dividendYield"),
148
- "currency": i.get("currency", "USD"),
149
- "summary": i.get("longBusinessSummary", "No summary available."),
150
- "sector": i.get("sector", "Unknown"),
151
- "industry": i.get("industry", "Unknown"),
152
- "website": i.get("website", "#")
153
- }
154
-
155
- def check_red_flags(self):
156
- flags = []
157
- try:
158
- df = self.get_financial_trends()
159
- if df.empty:
160
- return [{"type": "warning", "msg": "Insufficient data for Red Flag analysis"}]
161
-
162
- latest = df.iloc[-1]
163
-
164
- # 1. Debt Check (Using Total Debt from trends)
165
- debt = latest.get('Total Debt', 0)
166
- equity_val = latest.get('Equity', 0)
167
-
168
- if equity_val > 0:
169
- de_ratio = debt / equity_val
170
- if de_ratio > 2.0:
171
- flags.append({"type": "danger", "msg": f"High Debt (D/E: {de_ratio:.2f})"})
172
- else:
173
- flags.append({"type": "success", "msg": f"Healthy Debt (D/E: {de_ratio:.2f})"})
174
- else:
175
- flags.append({"type": "warning", "msg": "Cannot calculate D/E (Negative/Zero Equity)"})
176
-
177
-
178
- # 2. Revenue Trend
179
- if len(df) > 1 and 'Revenue' in df:
180
- if df['Revenue'].iloc[-1] < df['Revenue'].iloc[-2]:
181
- flags.append({"type": "danger", "msg": "Declining Revenue (YoY)"})
182
- else:
183
- flags.append({"type": "success", "msg": "Revenue Growing"})
184
-
185
- # 3. Free Cash Flow
186
- fcf_val = latest.get('Free Cash Flow', 0)
187
- if fcf_val < 0:
188
- flags.append({"type": "danger", "msg": "Negative Free Cash Flow"})
189
- else:
190
- flags.append({"type": "success", "msg": "Positive Free Cash Flow"})
191
-
192
- # 4. NEW: Current Ratio Check (Liquidity check)
193
- ca_val = latest.get('Current Assets', 0)
194
- cl_val = latest.get('Current Liabilities', 0)
195
- if cl_val > 0:
196
- current_ratio = ca_val / cl_val
197
- if current_ratio < 1.0:
198
- flags.append({"type": "danger", "msg": f"Poor Liquidity (Current Ratio: {current_ratio:.2f} < 1.0)"})
199
- else:
200
- flags.append({"type": "success", "msg": f"Good Liquidity (Current Ratio: {current_ratio:.2f})"})
201
-
202
-
203
- except Exception as e:
204
- flags.append({"type": "warning", "msg": f"Error calculating flags: {str(e)}"})
205
-
206
- return flags
207
-
208
- def calculate_piotroski_score(self):
209
- score = 0
210
- breakdown = []
211
- try:
212
- df = self.get_financial_trends()
213
-
214
- # We need at least 2 periods for year-over-year comparison (8 of 9 points)
215
- if len(df) < 2:
216
- return 0, ["Insufficient Historical Data (Need 2+ periods)"]
217
-
218
- curr = df.iloc[-1]
219
- prev = df.iloc[-2]
220
-
221
- # --- PROFITABILITY (F1 - F4) ---
222
- # F1. Positive Net Income
223
- if curr.get('Net Income', 0) > 0: score+=1; breakdown.append("✅ F1. Positive Net Income")
224
- else: breakdown.append("❌ F1. Negative Net Income")
225
-
226
- # F2. Positive Operating Cash Flow
227
- if curr.get('Operating Cash Flow', 0) > 0: score+=1; breakdown.append("��� F2. Positive OCF")
228
- else: breakdown.append("❌ F2. Negative OCF")
229
-
230
- # F3. ROA Increasing (Calculated using Net Income / Total Assets)
231
- if 'Total Assets' in curr and 'Net Income' in curr:
232
- # Use a small epsilon to prevent division by zero for total assets
233
- epsilon = 1
234
- roa_curr = curr.get('Net Income', 0) / curr.get('Total Assets', epsilon)
235
- roa_prev = prev.get('Net Income', 0) / prev.get('Total Assets', epsilon)
236
- if roa_curr > roa_prev: score+=1; breakdown.append("✅ F3. ROA Increasing")
237
- else: breakdown.append("❌ F3. ROA Decreasing")
238
- else: breakdown.append("⚠️ F3. ROA Change cannot be calculated")
239
-
240
- # F4. Quality of Earnings (OCF > Net Income)
241
- if curr.get('Operating Cash Flow', 0) > curr.get('Net Income', 0): score+=1; breakdown.append("✅ F4. Quality Earnings (OCF > Net Income)")
242
- else: breakdown.append("❌ F4. Low Quality Earnings")
243
-
244
- # --- LEVERAGE, LIQUIDITY, SOURCE OF FUNDS (F5 - F7) ---
245
-
246
- # F5. Decreased Leverage (Change in Total Debt / Total Assets)
247
- if 'Total Debt' in curr and 'Total Assets' in curr:
248
- epsilon = 1
249
- leverage_curr = curr.get('Total Debt', 0) / curr.get('Total Assets', epsilon)
250
- leverage_prev = prev.get('Total Debt', 0) / prev.get('Total Assets', epsilon)
251
- if leverage_curr < leverage_prev: score+=1; breakdown.append("✅ F5. Decreased Leverage (Debt/Assets Ratio)")
252
- else: breakdown.append("❌ F5. Increased Leverage")
253
- else: breakdown.append("⚠️ F5. Leverage Change cannot be calculated")
254
-
255
- # F6. Increased Current Ratio (Current Assets / Current Liabilities)
256
- if 'Current Assets' in curr and 'Current Liabilities' in curr:
257
- epsilon = 1
258
- cr_curr = curr.get('Current Assets', 0) / curr.get('Current Liabilities', epsilon)
259
- cr_prev = prev.get('Current Assets', 0) / prev.get('Current Liabilities', epsilon)
260
- if cr_curr > cr_prev: score+=1; breakdown.append("✅ F6. Increased Current Ratio (Liquidity)")
261
- else: breakdown.append("❌ F6. Decreased Current Ratio")
262
- else: breakdown.append("⚠️ F6. Current Ratio Change cannot be calculated")
263
-
264
- # F7. No New Shares Issued (Shares Issued <= Previous Period Shares Issued)
265
- if 'Shares Issued' in curr:
266
- if curr.get('Shares Issued', 0) <= prev.get('Shares Issued', 0): score+=1; breakdown.append("✅ F7. No Share Dilution (Shares <= Prior Period)")
267
- else: breakdown.append("❌ F7. Share Dilution")
268
- else: breakdown.append("⚠️ F7. Share Dilution cannot be assessed")
269
-
270
-
271
- # --- OPERATING EFFICIENCY (F8 - F9) ---
272
-
273
- # F8. Increased Gross Margin (Gross Profit / Revenue)
274
- if 'Gross Profit' in curr and 'Revenue' in curr:
275
- epsilon = 1
276
- gm_curr = curr.get('Gross Profit', 0) / curr.get('Revenue', epsilon)
277
- gm_prev = prev.get('Gross Profit', 0) / prev.get('Revenue', epsilon)
278
- if gm_curr > gm_prev: score+=1; breakdown.append("✅ F8. Increased Gross Margin")
279
- else: breakdown.append("❌ F8. Decreased Gross Margin")
280
- else: breakdown.append("⚠️ F8. Gross Margin Change cannot be calculated")
281
-
282
- # F9. Increased Asset Turnover (Revenue / Total Assets)
283
- if 'Revenue' in curr and 'Total Assets' in curr:
284
- epsilon = 1
285
- at_curr = curr.get('Revenue', 0) / curr.get('Total Assets', epsilon)
286
- at_prev = prev.get('Revenue', 0) / prev.get('Total Assets', epsilon)
287
- if at_curr > at_prev: score+=1; breakdown.append("✅ F9. Asset Turnover Up (Efficiency)")
288
- else: breakdown.append("❌ F9. Asset Turnover Down")
289
- else: breakdown.append("⚠️ F9. Asset Turnover Change cannot be calculated")
290
-
291
- return score, breakdown
292
- except Exception as e:
293
- # print(f"Error in Piotroski score: {e}") # Suppress console output
294
- return score, breakdown or ["Calculation Error: " + str(e)]
295
-
296
- # End of utils.py
 
1
  import yfinance as yf
2
  import pandas as pd
 
 
3
 
4
+ def fetch_financials(ticker_symbol: str, freq: str = "annual"):
5
+ """Fetch income statement and cashflow for a given ticker."""
6
+ ticker = yf.Ticker(ticker_symbol)
7
+
8
+ # Use get_* methods if available
9
+ try:
10
+ income = ticker.get_income_stmt(freq=freq)
11
+ except AttributeError:
12
+ # fallback to older API
13
+ income = ticker.financials
14
+
15
+ try:
16
+ cash = ticker.get_cashflow(freq=freq)
17
+ except AttributeError:
18
+ cash = ticker.cashflow
19
+
20
+ # If they return dict (as_dict=True), convert to DataFrame
21
+ if isinstance(income, dict):
22
+ income = pd.DataFrame(income)
23
+ if isinstance(cash, dict):
24
+ cash = pd.DataFrame(cash)
25
+
26
+ return income, cash
27
+
28
+
29
+ def clean_financials(df: pd.DataFrame) -> pd.DataFrame:
30
+ """Clean / transform the financials DataFrame for better plotting."""
31
+ # Transpose so dates become rows
32
+ df2 = df.T.copy()
33
+ # Optionally sort by date
34
+ try:
35
+ # If columns are strings of dates or period
36
+ df2.index = pd.to_datetime(df2.index)
37
+ df2 = df2.sort_index()
38
+ except Exception:
39
+ pass
40
+ return df2