|
|
""" |
|
|
data_utils.py - Hybrid data utilities (Colab/Streamlit friendly) |
|
|
|
|
|
Option B implementation: |
|
|
- Historical data: uses yfinance.history(...) (good for building ML features) |
|
|
- Live/current quote: uses NSE or BSE public APIs (when available) for up-to-date quotes |
|
|
- Fallbacks: if exchange API fails, returns best-effort data or empty dict |
|
|
- Also includes a simple fetch_news() placeholder and create_pdf_report() |
|
|
|
|
|
Usage: |
|
|
from data_utils import fetch_historical_data, fetch_live_quote, fetch_news, create_pdf_report |
|
|
|
|
|
Notes: |
|
|
- For NSE calls we use a requests.Session with common headers (NSE sometimes blocks non-browser agents). |
|
|
- For BSE calls we use the public BSE JSON endpoint (uses numeric security code or symbol depending on CSV). |
|
|
- This file assumes tickers in Yahoo format: e.g., "RELIANCE.NS" (NSE) or "500325.BO" (BSE). |
|
|
""" |
|
|
|
|
|
import pandas as pd |
|
|
import yfinance as yf |
|
|
import requests |
|
|
import time |
|
|
from datetime import datetime |
|
|
import matplotlib.pyplot as plt |
|
|
from fpdf import FPDF |
|
|
import os |
|
|
import re |
|
|
from typing import Optional, Dict, Any |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_ticker(ticker: str) -> (str, Optional[str]): |
|
|
""" |
|
|
Normalize ticker string and return (base, suffix) |
|
|
Examples: |
|
|
"RELIANCE.NS" -> ("RELIANCE", "NS") |
|
|
"500325.BO" -> ("500325", "BO") |
|
|
"RELIANCE" -> ("RELIANCE", None) |
|
|
""" |
|
|
if not isinstance(ticker, str): |
|
|
return ("", None) |
|
|
t = ticker.strip() |
|
|
if "." in t: |
|
|
parts = t.split(".") |
|
|
base = ".".join(parts[:-1]).upper() |
|
|
suffix = parts[-1].upper() |
|
|
return (base, suffix) |
|
|
return (t.upper(), None) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_historical_data(ticker: str, start_date: Optional[str], end_date: Optional[str]): |
|
|
""" |
|
|
Fetch OHLCV historical data using yfinance. |
|
|
- ticker: e.g., 'RELIANCE.NS' or '^NSEI' |
|
|
- start_date, end_date: 'YYYY-MM-DD' strings or None |
|
|
Returns pandas DataFrame (Open, High, Low, Close, Volume) or empty DataFrame. |
|
|
""" |
|
|
try: |
|
|
t = yf.Ticker(ticker) |
|
|
|
|
|
df = t.history(start=start_date, end=end_date if end_date else None, interval="1d", auto_adjust=False) |
|
|
if df is None or df.empty: |
|
|
return pd.DataFrame() |
|
|
df = df[['Open', 'High', 'Low', 'Close', 'Volume']] |
|
|
df.index = pd.to_datetime(df.index) |
|
|
return df |
|
|
except Exception as e: |
|
|
print("fetch_historical_data error:", e) |
|
|
return pd.DataFrame() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_nse_quote(symbol: str, session: Optional[requests.Session] = None) -> Dict[str, Any]: |
|
|
""" |
|
|
Get live quote for NSE symbol using NSE public JSON endpoint. |
|
|
symbol: plain symbol like 'RELIANCE' (no .NS) |
|
|
Returns a dict with keys: LTP, DayHigh, DayLow, PrevClose, Volume, timestamp (if found) |
|
|
""" |
|
|
if not symbol: |
|
|
return {} |
|
|
url_index = "https://www.nseindia.com" |
|
|
api_url = f"https://www.nseindia.com/api/quote-equity?symbol={symbol}" |
|
|
|
|
|
s = session or requests.Session() |
|
|
headers = { |
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", |
|
|
"Accept-Language": "en-US,en;q=0.9", |
|
|
"Accept": "application/json, text/plain, */*", |
|
|
"Referer": "https://www.nseindia.com/", |
|
|
} |
|
|
s.headers.update(headers) |
|
|
try: |
|
|
|
|
|
s.get(url_index, timeout=5) |
|
|
time.sleep(0.1) |
|
|
r = s.get(api_url, timeout=5) |
|
|
r.raise_for_status() |
|
|
j = r.json() |
|
|
|
|
|
info = j.get("priceInfo", {}) or {} |
|
|
secinfo = j.get("securityInfo", {}) or {} |
|
|
intra = info.get("intraDayHighLow", {}) or {} |
|
|
result = { |
|
|
"symbol": j.get("metadata", {}).get("symbol", symbol), |
|
|
"LTP": info.get("lastPrice"), |
|
|
"DayHigh": intra.get("max"), |
|
|
"DayLow": intra.get("min"), |
|
|
"PrevClose": info.get("close"), |
|
|
"Volume": secinfo.get("totalTradedVolume") or j.get("tradeInfo", {}).get("totalTradedVolume"), |
|
|
"timestamp": datetime.now().isoformat() |
|
|
} |
|
|
return result |
|
|
except Exception as e: |
|
|
|
|
|
|
|
|
return {} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_bse_quote(security_code: str) -> Dict[str, Any]: |
|
|
""" |
|
|
Get live quote for BSE security code using BSE public API. |
|
|
security_code: numeric code like '500325' OR alphanumeric symbol sometimes works |
|
|
Returns a dict with keys: LTP, DayHigh, DayLow, PrevClose, Volume, timestamp (if found) |
|
|
""" |
|
|
if not security_code: |
|
|
return {} |
|
|
url = ("https://api.bseindia.com/BseIndiaAPI/api/GetStkQuote/w" |
|
|
f"?flag=EQ&securitycode={security_code}") |
|
|
try: |
|
|
r = requests.get(url, timeout=5) |
|
|
r.raise_for_status() |
|
|
j = r.json() |
|
|
|
|
|
cr = j.get("CurrRate", {}) or {} |
|
|
result = { |
|
|
"symbol": cr.get("Scripname") or security_code, |
|
|
"LTP": cr.get("LTP") or cr.get("LastPrice"), |
|
|
"Open": cr.get("Open"), |
|
|
"DayHigh": cr.get("High"), |
|
|
"DayLow": cr.get("Low"), |
|
|
"PrevClose": cr.get("PreviousClose"), |
|
|
"Volume": cr.get("TotalTradedQuantity") or cr.get("TradedQty"), |
|
|
"timestamp": datetime.now().isoformat() |
|
|
} |
|
|
return result |
|
|
except Exception as e: |
|
|
|
|
|
return {} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_live_quote(ticker: str) -> Dict[str, Any]: |
|
|
""" |
|
|
Public function to get a live/current quote for a ticker. |
|
|
- ticker: yahoo-style e.g., 'RELIANCE.NS' or '500325.BO' or 'RELIANCE' (best-effort) |
|
|
Returns dictionary with live fields or empty dict if none. |
|
|
Strategy: |
|
|
- parse ticker suffix .NS/.BO |
|
|
- if NSE -> use NSE API (_get_nse_quote) |
|
|
- if BSE -> use BSE API (_get_bse_quote) |
|
|
- fallback: query yfinance fast info (may be delayed) |
|
|
""" |
|
|
base, suffix = _parse_ticker(ticker) |
|
|
|
|
|
if suffix == "NS" or (suffix is None and ticker.upper().endswith("NS")): |
|
|
|
|
|
s = requests.Session() |
|
|
res = _get_nse_quote(base, session=s) |
|
|
if res: |
|
|
return res |
|
|
|
|
|
if suffix == "BO" or (suffix is None and ticker.upper().endswith("BO")): |
|
|
|
|
|
res = _get_bse_quote(base) |
|
|
if res: |
|
|
return res |
|
|
|
|
|
|
|
|
|
|
|
res = _get_nse_quote(base, session=requests.Session()) |
|
|
if res: |
|
|
return res |
|
|
|
|
|
res = _get_bse_quote(base) |
|
|
if res: |
|
|
return res |
|
|
|
|
|
|
|
|
try: |
|
|
t = yf.Ticker(ticker) |
|
|
info = t.info if hasattr(t, "info") else {} |
|
|
|
|
|
ltp = info.get("regularMarketPrice") or info.get("previousClose") or info.get("currentPrice") |
|
|
return { |
|
|
"symbol": base, |
|
|
"LTP": ltp, |
|
|
"DayHigh": info.get("dayHigh"), |
|
|
"DayLow": info.get("dayLow"), |
|
|
"PrevClose": info.get("previousClose"), |
|
|
"Volume": info.get("volume"), |
|
|
"timestamp": datetime.now().isoformat() |
|
|
} |
|
|
except Exception: |
|
|
return {} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_news(query: str, ticker: Optional[str] = None): |
|
|
""" |
|
|
Placeholder for news. Replace with NewsAPI or other provider for real news. |
|
|
Returns a small list of demo news dictionaries. |
|
|
""" |
|
|
return [ |
|
|
{"title": f"Latest update about {query}", "source": "DemoNews", "summary": "Replace with a real news API (NewsAPI/TwelveData news etc)."}, |
|
|
{"title": f"{query} quarterly results announced", "source": "DemoNews", "summary": "Demo placeholder."} |
|
|
] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_pdf_report(path: str, ticker: str, df: pd.DataFrame, live_info: Optional[Dict[str, Any]] = None): |
|
|
""" |
|
|
Create a simple PDF report including: |
|
|
- Close price time series plot |
|
|
- Histogram of daily returns |
|
|
- Optionally include small live quote box |
|
|
path: output pdf file path (e.g., 'report_RELIANCE_NS.pdf') |
|
|
ticker: ticker string used for headings |
|
|
df: DataFrame from fetch_historical_data |
|
|
live_info: optional dict returned by fetch_live_quote |
|
|
""" |
|
|
if df is None or df.empty: |
|
|
raise ValueError("DataFrame is empty; cannot create report.") |
|
|
|
|
|
out_dir = os.path.dirname(path) |
|
|
if out_dir and not os.path.exists(out_dir): |
|
|
os.makedirs(out_dir, exist_ok=True) |
|
|
|
|
|
|
|
|
plt.figure(figsize=(8, 3)) |
|
|
df['Close'].plot(title=f"{ticker} Close Price") |
|
|
plt.tight_layout() |
|
|
price_png = "temp_price.png" |
|
|
plt.savefig(price_png) |
|
|
plt.close() |
|
|
|
|
|
|
|
|
returns = df['Close'].pct_change().dropna() |
|
|
plt.figure(figsize=(8, 3)) |
|
|
returns.hist(bins=40) |
|
|
plt.title("Histogram of daily returns") |
|
|
plt.tight_layout() |
|
|
hist_png = "temp_hist.png" |
|
|
plt.savefig(hist_png) |
|
|
plt.close() |
|
|
|
|
|
|
|
|
pdf = FPDF() |
|
|
pdf.add_page() |
|
|
pdf.set_font("Arial", size=14) |
|
|
pdf.cell(0, 10, f"Report - {ticker}", ln=True) |
|
|
pdf.set_font("Arial", size=10) |
|
|
pdf.ln(4) |
|
|
pdf.cell(0, 8, f"Data rows: {len(df)}", ln=True) |
|
|
pdf.ln(4) |
|
|
|
|
|
|
|
|
if live_info: |
|
|
pdf.set_font("Arial", size=10) |
|
|
pdf.cell(0, 7, f"Live snapshot ({live_info.get('timestamp','')})", ln=True) |
|
|
pdf.set_font("Arial", size=9) |
|
|
pdf.cell(0, 6, f"LTP: {live_info.get('LTP')} DayHigh: {live_info.get('DayHigh')} DayLow: {live_info.get('DayLow')}", ln=True) |
|
|
pdf.cell(0, 6, f"PrevClose: {live_info.get('PrevClose')} Volume: {live_info.get('Volume')}", ln=True) |
|
|
pdf.ln(6) |
|
|
|
|
|
pdf.image(price_png, w=180) |
|
|
pdf.ln(6) |
|
|
pdf.image(hist_png, w=180) |
|
|
pdf.ln(6) |
|
|
pdf.output(path) |
|
|
|
|
|
|
|
|
try: |
|
|
os.remove(price_png) |
|
|
os.remove(hist_png) |
|
|
except: |
|
|
pass |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
print("Historical sample (RELIANCE.NS): rows ->", len(fetch_historical_data("RELIANCE.NS", "2022-01-01", None))) |
|
|
print("Live NSE sample for RELIANCE:", fetch_live_quote("RELIANCE.NS")) |
|
|
print("Live BSE sample for 500325:", fetch_live_quote("500325.BO")) |
|
|
|