Spaces:
Sleeping
Sleeping
File size: 2,212 Bytes
8afdd36 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import requests
from bs4 import BeautifulSoup
from datetime import datetime
# Scrape Screener
def scrape_screener(company):
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
company = company.upper().replace(" ", "")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
url = f"https://www.screener.in/company/{company}/consolidated/"
driver.get(url)
time.sleep(5)
net_cash_flow = None
try:
tables = driver.find_elements(By.CLASS_NAME, 'data-table')
if len(tables) >= 5:
table = tables[4] # 5th table (index 4)
rows = table.find_elements(By.TAG_NAME, 'tr')
for row in rows:
cells = row.find_elements(By.TAG_NAME, 'td')
cell_text = [cell.text.strip() for cell in cells]
if 'Net Cash Flow' in cell_text:
net_cash_flow = cell_text[-1]
break
except Exception as e:
print("Error:", e)
driver.quit()
return net_cash_flow
# Scrape Economic Times
def scrape_news(company):
articles = []
url = f"https://economictimes.indiatimes.com/topic/{company}-news"
response = requests.get(url)
if response.status_code != 200:
return articles
soup = BeautifulSoup(response.content, 'html.parser')
news = soup.find_all('div', class_='contentD')
for item in news:
headline = item.find('a').get_text(strip=True)
timestamp = item.find('time')
if timestamp:
timestamp = timestamp.get_text(strip=True).replace(" IST", "")
try:
timestamp = datetime.strptime(timestamp, "%d %b, %Y, %I:%M %p")
except ValueError:
continue
articles.append({
'Headline': headline,
'Timestamp': timestamp.strftime('%Y-%m-%d %H:%M:%S')
})
return articles
|