srivatsavdamaraju commited on
Commit
71e073a
Β·
verified Β·
1 Parent(s): 783941f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -0
app.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --------------------------------------------------------------
2
+ # gradio_s3_sql.py
3
+ # --------------------------------------------------------------
4
+ import boto3
5
+ import pandas as pd
6
+ import duckdb
7
+ from io import StringIO
8
+ from botocore.exceptions import ClientError
9
+ import gradio as gr
10
+
11
+ # === YOUR IDRIVE E2 CONFIG (hardcoded) ===
12
+ ENDPOINT_URL = "https://s3.us-west-1.idrivee2.com"
13
+ ACCESS_KEY = "rNuPBAQetemqpEeBospZ"
14
+ SECRET_KEY = "BU4FccUYxzXVqiWjPSJM1CWEX1cNhBqbU9NeGidE"
15
+ BUCKET_NAME = "accusagas3"
16
+
17
+ # Initialize S3 client
18
+ s3 = boto3.client(
19
+ "s3",
20
+ endpoint_url=ENDPOINT_URL,
21
+ aws_access_key_id=ACCESS_KEY,
22
+ aws_secret_access_key=SECRET_KEY,
23
+ )
24
+
25
+
26
+ def run_sql(path: str, sql: str) -> pd.DataFrame:
27
+ """Core function: S3 β†’ CSV β†’ DuckDB β†’ SQL β†’ DataFrame"""
28
+ # --- 1. Load CSV from S3 ---
29
+ try:
30
+ obj = s3.get_object(Bucket=BUCKET_NAME, Key=path)
31
+ csv_content = obj["Body"].read().decode("utf-8")
32
+ df = pd.read_csv(StringIO(csv_content))
33
+ except ClientError as e:
34
+ if e.response["Error"]["Code"] == "NoSuchKey":
35
+ return pd.DataFrame({"error": [f"File not found: {path}"]})
36
+ return pd.DataFrame({"error": [f"S3 Error: {str(e)}"]})
37
+ except Exception as e:
38
+ return pd.DataFrame({"error": [f"CSV read failed: {str(e)}"]})
39
+
40
+ if df.empty:
41
+ return pd.DataFrame({"error": ["CSV is empty"]})
42
+
43
+ # --- 2. Auto-convert numeric columns ---
44
+ numeric_keywords = ["price", "amount", "value", "cost", "revenue", "total", "volume", "open", "high", "low", "close"]
45
+ for col in df.columns:
46
+ if any(kw in col.lower() for kw in numeric_keywords):
47
+ # Clean: remove $, %, commas
48
+ cleaned = df[col].astype(str).str.replace(r"[^\d.-]", "", regex=True)
49
+ df[col] = pd.to_numeric(cleaned, errors="coerce")
50
+
51
+ # --- 3. Run SQL in DuckDB ---
52
+ con = duckdb.connect(":memory:")
53
+ con.register("data", df)
54
+
55
+ norm_sql = sql.strip().lower()
56
+ if not norm_sql.startswith(("select", "with")):
57
+ con.close()
58
+ return pd.DataFrame({"error": ["Only SELECT or WITH queries allowed"]})
59
+
60
+ try:
61
+ result = con.execute(sql).df()
62
+ except Exception as e:
63
+ # Auto-fix: CAST column to DOUBLE if type mismatch
64
+ if "Cannot compare values of type VARCHAR" in str(e):
65
+ import re
66
+ match = re.search(r"column ([a-zA-Z0-9_]+)", str(e), re.I)
67
+ col = match.group(1) if match else None
68
+ if col and col in df.columns:
69
+ fixed_sql = sql.replace(f"{col}", f"CAST({col} AS DOUBLE)")
70
+ try:
71
+ result = con.execute(fixed_sql).df()
72
+ except Exception as e2:
73
+ con.close()
74
+ return pd.DataFrame({"error": [f"SQL failed even after cast: {e2}"]})
75
+ else:
76
+ con.close()
77
+ return pd.DataFrame({"error": [f"Type error: {e}"]})
78
+ else:
79
+ con.close()
80
+ return pd.DataFrame({"error": [f"SQL Error: {e}"]})
81
+ finally:
82
+ con.close()
83
+
84
+ # Limit to 10,000 rows
85
+ return result.head(10_000)
86
+
87
+
88
+ # --------------------------------------------------------------
89
+ # Gradio Interface
90
+ # --------------------------------------------------------------
91
+ with gr.Blocks(title="S3 SQL Query (iDrive e2)") as demo:
92
+ gr.Markdown(
93
+ """
94
+ # S3 CSV SQL Explorer
95
+ **Query any CSV in your iDrive e2 bucket using SQL**
96
+ Table name: `data` | Auto-casts `Price`, `Amount`, etc. to numbers
97
+ """
98
+ )
99
+
100
+ with gr.Row():
101
+ path_input = gr.Textbox(
102
+ label="S3 Path (Key)",
103
+ placeholder="vatsav_123/reports/Gold Futures Historical Data.csv",
104
+ lines=1,
105
+ )
106
+ sql_input = gr.Textbox(
107
+ label="SQL Query",
108
+ placeholder="SELECT Date, Price FROM data WHERE Price > 1000 ORDER BY Date DESC LIMIT 10",
109
+ lines=4,
110
+ )
111
+
112
+ run_btn = gr.Button("Run SQL", variant="primary")
113
+ output = gr.Dataframe(
114
+ label="Result",
115
+ interactive=False,
116
+ wrap=True,
117
+ height=500,
118
+ )
119
+
120
+ # Click handler
121
+ run_btn.click(
122
+ fn=run_sql,
123
+ inputs=[path_input, sql_input],
124
+ outputs=output,
125
+ show_progress=True,
126
+ )
127
+
128
+ # Examples
129
+ gr.Examples(
130
+ examples=[
131
+ [
132
+ "vatsav_123/reports/Gold Futures Historical Data.csv",
133
+ "SELECT Date, Price FROM data WHERE Price > 2000 ORDER BY Date DESC LIMIT 10"
134
+ ],
135
+ [
136
+ "vatsav_123/reports/Gold Futures Historical Data.csv",
137
+ "SELECT MIN(Price) AS min_price, MAX(Price) AS max_price FROM data"
138
+ ],
139
+ [
140
+ "vatsav_123/reports/Gold Futures Historical Data.csv",
141
+ "SELECT * FROM data WHERE Volume > 1000000 LIMIT 5"
142
+ ],
143
+ ],
144
+ inputs=[path_input, sql_input],
145
+ )
146
+
147
+ gr.Markdown(
148
+ """
149
+ **Tips**
150
+ - Use `data` as table name
151
+ - Columns like `Price`, `Volume`, `Amount` are auto-converted to numbers
152
+ - Invalid SQL β†’ clear error message
153
+ """
154
+ )
155
+
156
+
157
+ # --------------------------------------------------------------
158
+ # Launch
159
+ # --------------------------------------------------------------
160
+ if __name__ == "__main__":
161
+ demo.launch(
162
+ server_name="0.0.0.0",
163
+ server_port=7860,
164
+ share=False, # Set True for public link
165
+ debug=True,
166
+ mcp_server=True
167
+ )