srivatsavdamaraju commited on
Commit
ab11537
·
verified ·
1 Parent(s): a4c1e5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -120
app.py CHANGED
@@ -1,164 +1,76 @@
1
  # --------------------------------------------------------------
2
- # gradio_s3_sql.py (Gradio-compatible, no 'height' error)
3
  # --------------------------------------------------------------
4
  import boto3
5
  import pandas as pd
6
  import duckdb
7
  from io import StringIO
8
- from botocore.exceptions import ClientError
9
  import gradio as gr
10
 
11
- # === YOUR IDRIVE E2 CONFIG (hardcoded) ===
12
  ENDPOINT_URL = "https://s3.us-west-1.idrivee2.com"
13
  ACCESS_KEY = "rNuPBAQetemqpEeBospZ"
14
  SECRET_KEY = "BU4FccUYxzXVqiWjPSJM1CWEX1cNhBqbU9NeGidE"
15
- BUCKET_NAME = "accusagas3"
16
 
17
- s3 = boto3.client(
18
- "s3",
19
- endpoint_url=ENDPOINT_URL,
20
- aws_access_key_id=ACCESS_KEY,
21
- aws_secret_access_key=SECRET_KEY,
22
- )
23
 
24
 
25
  def run_sql(path: str, sql: str) -> pd.DataFrame:
26
- """S3 → CSV → DuckDB → SQL → DataFrame (auto-cast + auto-fix)"""
27
- # --- Load CSV ---
28
  try:
29
- obj = s3.get_object(Bucket=BUCKET_NAME, Key=path)
30
  df = pd.read_csv(StringIO(obj["Body"].read().decode("utf-8")))
31
- except ClientError as e:
32
- if e.response["Error"]["Code"] == "NoSuchKey":
33
- return pd.DataFrame({"error": [f"File not found: {path}"]})
34
- return pd.DataFrame({"error": [f"S3 Error: {e}"]})
35
  except Exception as e:
36
- return pd.DataFrame({"error": [f"CSV read failed: {e}"]})
37
 
38
  if df.empty:
39
- return pd.DataFrame({"error": ["CSV is empty"]})
40
 
41
- # --- Auto-convert numeric columns ---
42
- numeric_keywords = ["price", "amount", "value", "cost", "revenue", "total", "volume", "open", "high", "low", "close"]
43
  for col in df.columns:
44
- if any(kw in col.lower() for kw in numeric_keywords):
45
- cleaned = df[col].astype(str).str.replace(r"[^\d.-]", "", regex=True)
46
- df[col] = pd.to_numeric(cleaned, errors="coerce")
47
 
48
- # --- DuckDB ---
49
  con = duckdb.connect(":memory:")
50
  con.register("data", df)
51
 
52
- norm_sql = sql.strip().lower()
53
- if not norm_sql.startswith(("select", "with")):
54
  con.close()
55
- return pd.DataFrame({"error": ["Only SELECT or WITH queries allowed"]})
56
 
57
  try:
58
  result = con.execute(sql).df()
59
  except Exception as e:
60
- # Auto-fix VARCHAR vs number
61
- if "Cannot compare values of type VARCHAR" in str(e):
62
- import re
63
- match = re.search(r"column ([a-zA-Z0-9_]+)", str(e), re.I)
64
- col = match.group(1) if match else None
65
- if col and col in df.columns:
66
- fixed_sql = sql.replace(f"{col}", f"CAST({col} AS DOUBLE)")
67
- try:
68
- result = con.execute(fixed_sql).df()
69
- except Exception as e2:
70
- con.close()
71
- return pd.DataFrame({"error": [f"SQL failed even after CAST: {e2}"]})
72
  else:
73
  con.close()
74
- return pd.DataFrame({"error": [f"Type error: {e}"]})
75
  else:
76
  con.close()
77
- return pd.DataFrame({"error": [f"SQL Error: {e}"]})
78
  finally:
79
  con.close()
80
 
81
- return result.head(10_000)
82
 
83
 
84
- # --------------------------------------------------------------
85
- # Gradio UI (no 'height' on Dataframe)
86
- # --------------------------------------------------------------
87
- with gr.Blocks(title="S3 SQL Query (iDrive e2)") as demo:
88
- gr.Markdown(
89
- """
90
- # S3 CSV SQL Explorer
91
- Query any CSV in your iDrive e2 bucket using SQL
92
- Table name: `data` | Auto-casts `Price`, `Amount`, etc.
93
- """
94
- )
95
-
96
- with gr.Row():
97
- path_input = gr.Textbox(
98
- label="S3 Path (Key)",
99
- placeholder="vatsav_123/reports/Gold Futures Historical Data.csv",
100
- lines=1,
101
- )
102
- sql_input = gr.Textbox(
103
- label="SQL Query",
104
- placeholder="SELECT Date, Price FROM data WHERE Price > 1000 ORDER BY Date DESC LIMIT 10",
105
- lines=4,
106
- )
107
-
108
- run_btn = gr.Button("Run SQL", variant="primary")
109
-
110
- # Use DataGrid + CSS for fixed height
111
- output = gr.DataGrid(
112
- label="Result",
113
- interactive=False,
114
- wrap=True,
115
- )
116
 
117
- # Apply height via CSS
118
- demo.load(
119
- None,
120
- None,
121
- None,
122
- _js=f"""
123
- () => {{
124
- setTimeout(() => {{
125
- const grid = document.querySelector('.gradio-container .data-grid');
126
- if (grid) grid.style.maxHeight = '500px';
127
- if (grid) grid.style.overflowY = 'auto';
128
- }}, 100);
129
- }}
130
- """
131
- )
132
 
133
- run_btn.click(
134
- fn=run_sql,
135
- inputs=[path_input, sql_input],
136
- outputs=output,
137
- show_progress=True,
138
- )
139
 
140
- gr.Examples(
141
- examples=[
142
- [
143
- "vatsav_123/reports/Gold Futures Historical Data.csv",
144
- "SELECT Date, Price FROM data WHERE Price > 2000 ORDER BY Date DESC LIMIT 10"
145
- ],
146
- [
147
- "vatsav_123/reports/Gold Futures Historical Data.csv",
148
- "SELECT MIN(Price) AS min_price, MAX(Price) AS max_price FROM data"
149
- ],
150
- ],
151
- inputs=[path_input, sql_input],
152
- )
153
-
154
-
155
- # --------------------------------------------------------------
156
- # Launch
157
- # --------------------------------------------------------------
158
  if __name__ == "__main__":
159
- demo.launch(
160
- server_name="0.0.0.0",
161
- server_port=7860,
162
- share=False,
163
- debug=True,
164
- )
 
1
  # --------------------------------------------------------------
2
+ # simple_s3_sql.py
3
  # --------------------------------------------------------------
4
  import boto3
5
  import pandas as pd
6
  import duckdb
7
  from io import StringIO
 
8
  import gradio as gr
9
 
10
+ # === YOUR CREDENTIALS ===
11
  ENDPOINT_URL = "https://s3.us-west-1.idrivee2.com"
12
  ACCESS_KEY = "rNuPBAQetemqpEeBospZ"
13
  SECRET_KEY = "BU4FccUYxzXVqiWjPSJM1CWEX1cNhBqbU9NeGidE"
14
+ BUCKET = "accusagas3"
15
 
16
+ s3 = boto3.client("s3", endpoint_url=ENDPOINT_URL,
17
+ aws_access_key_id=ACCESS_KEY,
18
+ aws_secret_access_key=SECRET_KEY)
 
 
 
19
 
20
 
21
  def run_sql(path: str, sql: str) -> pd.DataFrame:
 
 
22
  try:
23
+ obj = s3.get_object(Bucket=BUCKET, Key=path)
24
  df = pd.read_csv(StringIO(obj["Body"].read().decode("utf-8")))
 
 
 
 
25
  except Exception as e:
26
+ return pd.DataFrame({"error": [str(e)]})
27
 
28
  if df.empty:
29
+ return pd.DataFrame({"error": ["Empty CSV"]})
30
 
31
+ # Auto-convert Price, Volume, etc.
 
32
  for col in df.columns:
33
+ if any(x in col.lower() for x in ["price", "volume", "amount"]):
34
+ df[col] = pd.to_numeric(df[col].astype(str).str.replace(r"[^\d.-]", "", regex=True), errors="coerce")
 
35
 
 
36
  con = duckdb.connect(":memory:")
37
  con.register("data", df)
38
 
39
+ if not sql.strip().lower().startswith(("select", "with")):
 
40
  con.close()
41
+ return pd.DataFrame({"error": ["Only SELECT allowed"]})
42
 
43
  try:
44
  result = con.execute(sql).df()
45
  except Exception as e:
46
+ # Try auto-casting
47
+ if "VARCHAR" in str(e):
48
+ col = __import__("re").search(r"column ([a-zA-Z0-9_]+)", str(e))
49
+ if col and (c := col.group(1)) in df.columns:
50
+ sql = sql.replace(c, f"CAST({c} AS DOUBLE)")
51
+ result = con.execute(sql).df()
 
 
 
 
 
 
52
  else:
53
  con.close()
54
+ return pd.DataFrame({"error": [str(e)]})
55
  else:
56
  con.close()
57
+ return pd.DataFrame({"error": [str(e)]})
58
  finally:
59
  con.close()
60
 
61
+ return result.head(10000)
62
 
63
 
64
+ # === GRADIO UI ===
65
+ with gr.Blocks() as app:
66
+ gr.Markdown("## S3 CSV SQL")
67
+ path = gr.Textbox(label="S3 Path", placeholder="vatsav_123/reports/Gold Futures Historical Data.csv")
68
+ sql = gr.Textbox(label="SQL", lines=3, placeholder="SELECT Date, Price FROM data WHERE Price > 1000")
69
+ btn = gr.Button("Run")
70
+ out = gr.Dataframe() # Only this no height, no extras
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ btn.click(run_sql, [path, sql], out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
 
 
 
 
 
 
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  if __name__ == "__main__":
76
+ app.launch(server_name="0.0.0.0", server_port=7860)