eshan6704 commited on
Commit
7ac013c
·
verified ·
1 Parent(s): 5c589e1

Update csvloader.py

Browse files
Files changed (1) hide show
  1. csvloader.py +50 -40
csvloader.py CHANGED
@@ -1,56 +1,49 @@
1
- # CSV.py
2
-
3
- import pandas as pd
4
  import requests
5
  import zipfile
6
- from io import BytesIO
7
  from datetime import datetime as dt
8
- from typing import List, Union
 
 
9
 
10
  from persist import exists, load, save
11
 
12
 
13
- def load_csv(
14
- url: str,
15
- header_row: int = 0,
16
- text_cols: List[str] | None = None
17
- ) -> Union[pd.DataFrame, List[pd.DataFrame]]:
18
  """
19
- Load CSV or ZIP containing CSVs
20
- - .csv -> DataFrame
21
- - .zip -> List[DataFrame]
 
 
 
 
22
  """
23
- text_cols = text_cols or []
24
-
25
- def _clean_df(df: pd.DataFrame) -> pd.DataFrame:
26
- df = df.loc[:, ~df.columns.str.contains("^Unnamed")]
27
- df.columns = (
28
- df.columns
29
- .str.strip()
30
- .str.replace(" ", "_")
31
- .str.replace("-", "_")
32
- )
33
- for col in df.columns:
34
- if col not in text_cols:
35
- df[col] = pd.to_numeric(df[col], errors="coerce")
36
- return df.dropna(how="all")
37
 
38
  if url.lower().endswith(".zip"):
39
- r = requests.get(url)
40
- r.raise_for_status()
41
  z = zipfile.ZipFile(BytesIO(r.content))
42
- dfs = []
 
43
  for name in z.namelist():
44
  if name.lower().endswith(".csv"):
45
  with z.open(name) as f:
46
- df = pd.read_csv(f, header=header_row)
47
- dfs.append(_clean_df(df))
48
- return dfs
49
 
50
- df = pd.read_csv(url, header=header_row)
51
- return _clean_df(df)
52
 
53
 
 
 
 
54
  def _highlow_html_formatter(df: pd.DataFrame, date_str: str) -> str:
55
  metric = "PERCENT_CHANGE"
56
  df_html = df.copy()
@@ -62,17 +55,20 @@ def _highlow_html_formatter(df: pd.DataFrame, date_str: str) -> str:
62
  for col in df_html.columns:
63
  val = row[col]
64
  style = ""
 
65
  if isinstance(val, (int, float)):
66
  txt = f"{val:.2f}"
67
  if val > 0:
68
  style = "pos"
69
  elif val < 0:
70
  style = "neg"
 
71
  if col == metric:
72
  if idx in top_up:
73
  style += " top-up"
74
  elif idx in top_dn:
75
  style += " top-down"
 
76
  df_html.at[idx, col] = f'<span class="{style.strip()}">{txt}</span>'
77
  else:
78
  df_html.at[idx, col] = str(val)
@@ -102,10 +98,17 @@ th {{ background:#222; color:white; }}
102
  """
103
 
104
 
 
 
 
105
  def nse_highlow(date_str: str | None = None) -> str:
106
  """
107
  Master NSE High-Low function
108
- - Uses load_csv
 
 
 
 
109
  - Builds HTML
110
  - Persists ONLY HTML
111
  """
@@ -123,12 +126,19 @@ def nse_highlow(date_str: str | None = None) -> str:
123
  f"ind_close_all_{d.strftime('%d%m%Y')}.csv"
124
  )
125
 
126
- df = load_csv(
127
- url=url,
128
- header_row=2,
129
- text_cols=["Index_Name", "Index_Date"]
 
 
 
130
  )
131
 
 
132
  html = _highlow_html_formatter(df, date_str)
 
 
133
  save(cache_key, html, "html")
 
134
  return html
 
1
+ # ==============================
2
+ # Imports
3
+ # ==============================
4
  import requests
5
  import zipfile
6
+ from io import BytesIO, StringIO
7
  from datetime import datetime as dt
8
+ from typing import Dict, Union
9
+
10
+ import pandas as pd
11
 
12
  from persist import exists, load, save
13
 
14
 
15
+ # ==============================
16
+ # Raw CSV Loader (NO parsing)
17
+ # ==============================
18
+ def load_csv(url: str) -> Union[str, Dict[str, str]]:
 
19
  """
20
+ Pure transport loader
21
+ - .csv -> raw CSV text (str)
22
+ - .zip -> {filename: raw CSV text}
23
+
24
+ NO parsing
25
+ NO cleaning
26
+ NO assumptions
27
  """
28
+ r = requests.get(url)
29
+ r.raise_for_status()
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  if url.lower().endswith(".zip"):
 
 
32
  z = zipfile.ZipFile(BytesIO(r.content))
33
+ out: Dict[str, str] = {}
34
+
35
  for name in z.namelist():
36
  if name.lower().endswith(".csv"):
37
  with z.open(name) as f:
38
+ out[name] = f.read().decode("utf-8", errors="ignore")
39
+ return out
 
40
 
41
+ return r.text
 
42
 
43
 
44
+ # ==============================
45
+ # NSE High-Low HTML Formatter
46
+ # ==============================
47
  def _highlow_html_formatter(df: pd.DataFrame, date_str: str) -> str:
48
  metric = "PERCENT_CHANGE"
49
  df_html = df.copy()
 
55
  for col in df_html.columns:
56
  val = row[col]
57
  style = ""
58
+
59
  if isinstance(val, (int, float)):
60
  txt = f"{val:.2f}"
61
  if val > 0:
62
  style = "pos"
63
  elif val < 0:
64
  style = "neg"
65
+
66
  if col == metric:
67
  if idx in top_up:
68
  style += " top-up"
69
  elif idx in top_dn:
70
  style += " top-down"
71
+
72
  df_html.at[idx, col] = f'<span class="{style.strip()}">{txt}</span>'
73
  else:
74
  df_html.at[idx, col] = str(val)
 
98
  """
99
 
100
 
101
+ # ==============================
102
+ # NSE High-Low Master Function
103
+ # ==============================
104
  def nse_highlow(date_str: str | None = None) -> str:
105
  """
106
  Master NSE High-Low function
107
+
108
+ Responsibilities:
109
+ - Knows NSE CSV structure
110
+ - Header starts at row index 2 (skip 0 & 1)
111
+ - Uses raw CSV loader
112
  - Builds HTML
113
  - Persists ONLY HTML
114
  """
 
126
  f"ind_close_all_{d.strftime('%d%m%Y')}.csv"
127
  )
128
 
129
+ # 1️⃣ Load raw CSV text
130
+ csv_text = load_csv(url)
131
+
132
+ # 2️⃣ NSE-specific parsing (header row = 2)
133
+ df = pd.read_csv(
134
+ StringIO(csv_text),
135
+ header=2
136
  )
137
 
138
+ # 3️⃣ Build HTML
139
  html = _highlow_html_formatter(df, date_str)
140
+
141
+ # 4️⃣ Persist HTML only
142
  save(cache_key, html, "html")
143
+
144
  return html