Spaces:

Fahad10inb
/

stock_pred

Sleeping

App Files Files Community

stock_pred / backend /utils /WebScraper.py

Fahad10inb

Backend changes

8afdd36 11 months ago

raw

history blame contribute delete

2.21 kB

	import time
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.chrome.service import Service
	from webdriver_manager.chrome import ChromeDriverManager
	import requests
	from bs4 import BeautifulSoup
	from datetime import datetime

	# Scrape Screener
	def scrape_screener(company):
	options = webdriver.ChromeOptions()
	options.add_argument("--headless")
	options.add_argument("--disable-gpu")
	company = company.upper().replace(" ", "")

	driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

	url = f"https://www.screener.in/company/{company}/consolidated/"
	driver.get(url)

	time.sleep(5)

	net_cash_flow = None

	try:
	tables = driver.find_elements(By.CLASS_NAME, 'data-table')
	if len(tables) >= 5:
	table = tables[4] # 5th table (index 4)
	rows = table.find_elements(By.TAG_NAME, 'tr')

	for row in rows:
	cells = row.find_elements(By.TAG_NAME, 'td')
	cell_text = [cell.text.strip() for cell in cells]

	if 'Net Cash Flow' in cell_text:
	net_cash_flow = cell_text[-1]
	break
	except Exception as e:
	print("Error:", e)

	driver.quit()
	return net_cash_flow

	# Scrape Economic Times
	def scrape_news(company):
	articles = []
	url = f"https://economictimes.indiatimes.com/topic/{company}-news"
	response = requests.get(url)

	if response.status_code != 200:
	return articles

	soup = BeautifulSoup(response.content, 'html.parser')
	news = soup.find_all('div', class_='contentD')

	for item in news:
	headline = item.find('a').get_text(strip=True)
	timestamp = item.find('time')

	if timestamp:
	timestamp = timestamp.get_text(strip=True).replace(" IST", "")
	try:
	timestamp = datetime.strptime(timestamp, "%d %b, %Y, %I:%M %p")
	except ValueError:
	continue
	articles.append({
	'Headline': headline,
	'Timestamp': timestamp.strftime('%Y-%m-%d %H:%M:%S')
	})

	return articles