abhijitdas2821 commited on
Commit
2bf0e2b
·
verified ·
1 Parent(s): a5bbe36

Delete update_news.py

Browse files
Files changed (1) hide show
  1. update_news.py +0 -50
update_news.py DELETED
@@ -1,50 +0,0 @@
1
- import pandas as pd
2
- import requests
3
- import os
4
-
5
- API_KEY = os.getenv("GNEWS_API_KEY")
6
-
7
- URL = f"https://gnews.io/api/v4/top-headlines?category=general&lang=en&country=in&max=20&apikey={API_KEY}"
8
-
9
- def update_news_csv():
10
- # Load old dataset if exists
11
- if os.path.exists("news.csv"):
12
- df_old = pd.read_csv("news.csv")
13
- else:
14
- df_old = pd.DataFrame(columns=["title", "text", "label"])
15
-
16
- # Fetch latest real news
17
- response = requests.get(URL)
18
- data = response.json()
19
-
20
- articles = data.get("articles", [])
21
-
22
- new_rows = []
23
-
24
- for article in articles:
25
- title = article.get("title", "")
26
- text = article.get("description", "") or article.get("content", "")
27
-
28
- if title and text:
29
- new_rows.append({
30
- "title": title,
31
- "text": text,
32
- "label": "REAL"
33
- })
34
-
35
- df_new = pd.DataFrame(new_rows)
36
-
37
- # Merge old + new
38
- df = pd.concat([df_old, df_new], ignore_index=True)
39
-
40
- # Remove duplicates by title
41
- df.drop_duplicates(subset=["title"], inplace=True)
42
-
43
- # Save back
44
- df.to_csv("news.csv", index=False)
45
-
46
- print("news.csv updated successfully!")
47
- print("Total rows:", len(df))
48
-
49
- if __name__ == "__main__":
50
- update_news_csv()