Badumetsibb commited on
Commit
a1409c3
·
verified ·
1 Parent(s): c2eaf87

Create csv_util.py

Browse files
Files changed (1) hide show
  1. csv_util.py +66 -0
csv_util.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # csv_util.py
2
+
3
+ import os
4
+ import pandas as pd
5
+ import logging
6
+
7
+ logging.basicConfig(
8
+ level=logging.INFO,
9
+ format='%(asctime)s [%(levelname)s] %(name)s: %(message)s',
10
+ datefmt='%Y-%m-%d %H:%M:%S'
11
+ )
12
+ logger = logging.getLogger(__name__)
13
+
14
+ CSV_COLUMNS = ["DateTime", "Currency", "Impact", "Event", "Actual", "Forecast", "Previous", "Detail"]
15
+
16
+ def ensure_csv_header(csv_file):
17
+ if not os.path.exists(csv_file) or os.path.getsize(csv_file) == 0:
18
+ pd.DataFrame(columns=CSV_COLUMNS).to_csv(csv_file, index=False)
19
+
20
+ def read_existing_data(csv_file):
21
+ ensure_csv_header(csv_file)
22
+ try:
23
+ df = pd.read_csv(csv_file, dtype=str)
24
+ for col in CSV_COLUMNS:
25
+ if col not in df.columns:
26
+ df[col] = ""
27
+ return df[CSV_COLUMNS]
28
+ except Exception:
29
+ return pd.DataFrame(columns=CSV_COLUMNS)
30
+
31
+ def write_data_to_csv(df: pd.DataFrame, csv_file: str):
32
+ df = df.sort_values(by="DateTime", ascending=True)
33
+ df.to_csv(csv_file, index=False)
34
+
35
+ def merge_new_data(existing_df, new_df):
36
+ if existing_df.empty:
37
+ return new_df
38
+
39
+ def add_unique_key(df):
40
+ df = df.copy()
41
+ df['unique_key'] = (
42
+ df["DateTime"].astype(str).str.strip() + "_" +
43
+ df["Currency"].astype(str).str.strip() + "_" +
44
+ df["Event"].astype(str).str.strip()
45
+ )
46
+ return df
47
+
48
+ existing_df = add_unique_key(existing_df)
49
+ new_df = add_unique_key(new_df)
50
+ existing_df.set_index('unique_key', inplace=True)
51
+ new_df.set_index('unique_key', inplace=True)
52
+ new_rows_list = []
53
+ for key, new_row in new_df.iterrows():
54
+ if key in existing_df.index:
55
+ existing_detail = str(existing_df.at[key, "Detail"]).strip() if pd.notna(existing_df.at[key, "Detail"]) else ""
56
+ new_detail = str(new_row["Detail"]).strip() if pd.notna(new_row["Detail"]) else ""
57
+ if not existing_detail and new_detail:
58
+ existing_df.at[key, "Detail"] = new_detail
59
+ else:
60
+ new_rows_list.append(new_row)
61
+ if new_rows_list:
62
+ new_rows_df = pd.DataFrame(new_rows_list)
63
+ existing_df = pd.concat([existing_df, new_rows_df])
64
+ merged_df = existing_df.reset_index(drop=True)
65
+ merged_df = merged_df[CSV_COLUMNS]
66
+ return merged_df