Spaces:
Runtime error
Runtime error
Create csv_util.py
Browse files- csv_util.py +66 -0
csv_util.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# csv_util.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import logging
|
| 6 |
+
|
| 7 |
+
logging.basicConfig(
|
| 8 |
+
level=logging.INFO,
|
| 9 |
+
format='%(asctime)s [%(levelname)s] %(name)s: %(message)s',
|
| 10 |
+
datefmt='%Y-%m-%d %H:%M:%S'
|
| 11 |
+
)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
CSV_COLUMNS = ["DateTime", "Currency", "Impact", "Event", "Actual", "Forecast", "Previous", "Detail"]
|
| 15 |
+
|
| 16 |
+
def ensure_csv_header(csv_file):
|
| 17 |
+
if not os.path.exists(csv_file) or os.path.getsize(csv_file) == 0:
|
| 18 |
+
pd.DataFrame(columns=CSV_COLUMNS).to_csv(csv_file, index=False)
|
| 19 |
+
|
| 20 |
+
def read_existing_data(csv_file):
|
| 21 |
+
ensure_csv_header(csv_file)
|
| 22 |
+
try:
|
| 23 |
+
df = pd.read_csv(csv_file, dtype=str)
|
| 24 |
+
for col in CSV_COLUMNS:
|
| 25 |
+
if col not in df.columns:
|
| 26 |
+
df[col] = ""
|
| 27 |
+
return df[CSV_COLUMNS]
|
| 28 |
+
except Exception:
|
| 29 |
+
return pd.DataFrame(columns=CSV_COLUMNS)
|
| 30 |
+
|
| 31 |
+
def write_data_to_csv(df: pd.DataFrame, csv_file: str):
|
| 32 |
+
df = df.sort_values(by="DateTime", ascending=True)
|
| 33 |
+
df.to_csv(csv_file, index=False)
|
| 34 |
+
|
| 35 |
+
def merge_new_data(existing_df, new_df):
|
| 36 |
+
if existing_df.empty:
|
| 37 |
+
return new_df
|
| 38 |
+
|
| 39 |
+
def add_unique_key(df):
|
| 40 |
+
df = df.copy()
|
| 41 |
+
df['unique_key'] = (
|
| 42 |
+
df["DateTime"].astype(str).str.strip() + "_" +
|
| 43 |
+
df["Currency"].astype(str).str.strip() + "_" +
|
| 44 |
+
df["Event"].astype(str).str.strip()
|
| 45 |
+
)
|
| 46 |
+
return df
|
| 47 |
+
|
| 48 |
+
existing_df = add_unique_key(existing_df)
|
| 49 |
+
new_df = add_unique_key(new_df)
|
| 50 |
+
existing_df.set_index('unique_key', inplace=True)
|
| 51 |
+
new_df.set_index('unique_key', inplace=True)
|
| 52 |
+
new_rows_list = []
|
| 53 |
+
for key, new_row in new_df.iterrows():
|
| 54 |
+
if key in existing_df.index:
|
| 55 |
+
existing_detail = str(existing_df.at[key, "Detail"]).strip() if pd.notna(existing_df.at[key, "Detail"]) else ""
|
| 56 |
+
new_detail = str(new_row["Detail"]).strip() if pd.notna(new_row["Detail"]) else ""
|
| 57 |
+
if not existing_detail and new_detail:
|
| 58 |
+
existing_df.at[key, "Detail"] = new_detail
|
| 59 |
+
else:
|
| 60 |
+
new_rows_list.append(new_row)
|
| 61 |
+
if new_rows_list:
|
| 62 |
+
new_rows_df = pd.DataFrame(new_rows_list)
|
| 63 |
+
existing_df = pd.concat([existing_df, new_rows_df])
|
| 64 |
+
merged_df = existing_df.reset_index(drop=True)
|
| 65 |
+
merged_df = merged_df[CSV_COLUMNS]
|
| 66 |
+
return merged_df
|