PREDICTIONSITE / scraper.py
Jitendra12421's picture
Upload 52 files
ee9a2a0 verified
Raw
History Blame Contribute Delete
3.49 kB
import requests
from bs4 import BeautifulSoup
def get_stock_info(ticker):
url = f"https://www.screener.in/company/{ticker.upper()}/consolidated/"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
}
response = requests.get(url, headers=headers)
if response.status_code == 404:
url = f"https://www.screener.in/company/{ticker.upper()}/"
response = requests.get(url, headers=headers)
if response.status_code != 200:
return {"error": f"Failed to fetch data for {ticker.upper()}. Status {response.status_code}"}
soup = BeautifulSoup(response.text, 'html.parser')
data = {
"ticker": ticker.upper(),
"key_metrics": {},
"pros": [],
"cons": [],
"growth": [],
"history": {}
}
# 1. Key Metrics
top_ratios = soup.find('ul', id='top-ratios')
if top_ratios:
for li in top_ratios.find_all('li'):
n_span = li.find('span', class_='name')
v_span = li.find('span', class_='value')
if n_span and v_span:
name = n_span.text.strip().replace('₹', 'Rs.')
val = ' '.join(v_span.text.split()).replace('₹', 'Rs.')
data["key_metrics"][name] = val
# 2. Pros/Cons
analysis = soup.find('section', id='analysis')
if analysis:
pros = analysis.find('div', class_='pros')
if pros:
data["pros"] = [li.text.strip() for li in pros.find_all('li')]
cons = analysis.find('div', class_='cons')
if cons:
data["cons"] = [li.text.strip() for li in cons.find_all('li')]
# 3. Growth Metrics
ranges = soup.find_all('table', class_='ranges-table')
for table in ranges:
th = table.find('th')
if not th: continue
metric = th.text.strip()
for tr in table.find_all('tr')[1:]:
tds = tr.find_all('td')
if len(tds) == 2:
data["growth"].append({
"Metric": metric,
"Period": tds[0].text.strip(),
"Value": tds[1].text.strip()
})
# 4. Tables
sections = {
'quarters': 'Quarterly Results',
'profit-loss': 'Profit & Loss',
'balance-sheet': 'Balance Sheet',
'cash-flow': 'Cash Flows',
'ratios': 'Financial Ratios',
'shareholding': 'Shareholding Pattern'
}
for sec_id, sec_name in sections.items():
sec = soup.find('section', id=sec_id)
if not sec: continue
tbl = sec.find('table')
if not tbl: continue
thead = tbl.find('thead')
headers = [th.text.strip().replace('₹', 'Rs.') for th in thead.find_all('th')] if thead else []
rows = []
for tr in tbl.find('tbody').find_all('tr'):
cols = [td.text.strip().replace('₹', 'Rs.') for td in tr.find_all('td')]
rname = tr.find('td', class_='text')
if rname:
cols[0] = rname.text.replace('+', '').strip()
if cols:
rows.append(cols)
if headers and len(headers) == len(rows[0]) - 1:
headers.insert(0, 'Metric')
data["history"][sec_id] = {
"title": sec_name,
"headers": headers,
"rows": rows
}
return data