Spaces:

Fahad10inb
/

stock_pred

Sleeping

File size: 2,212 Bytes

8afdd36

import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import requests
from bs4 import BeautifulSoup
from datetime import datetime

# Scrape Screener
def scrape_screener(company):
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    company = company.upper().replace(" ", "")

    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    url = f"https://www.screener.in/company/{company}/consolidated/"
    driver.get(url)

    time.sleep(5)

    net_cash_flow = None

    try:
        tables = driver.find_elements(By.CLASS_NAME, 'data-table')
        if len(tables) >= 5:
            table = tables[4]  # 5th table (index 4)
            rows = table.find_elements(By.TAG_NAME, 'tr')

            for row in rows:
                cells = row.find_elements(By.TAG_NAME, 'td')
                cell_text = [cell.text.strip() for cell in cells]

                if 'Net Cash Flow' in cell_text:
                    net_cash_flow = cell_text[-1]
                    break
    except Exception as e:
        print("Error:", e)

    driver.quit()
    return net_cash_flow    

# Scrape Economic Times
def scrape_news(company):
    articles = []
    url = f"https://economictimes.indiatimes.com/topic/{company}-news"
    response = requests.get(url)

    if response.status_code != 200:
        return articles  

    soup = BeautifulSoup(response.content, 'html.parser')
    news = soup.find_all('div', class_='contentD')

    for item in news:
        headline = item.find('a').get_text(strip=True)
        timestamp = item.find('time')

        if timestamp:
            timestamp = timestamp.get_text(strip=True).replace(" IST", "")
            try:
                timestamp = datetime.strptime(timestamp, "%d %b, %Y, %I:%M %p")
            except ValueError:
                continue
            articles.append({
                'Headline': headline,
                'Timestamp': timestamp.strftime('%Y-%m-%d %H:%M:%S')
            })

    return articles