Spaces:

srivatsavdamaraju
/

sql_tool

Sleeping

App Files Files Community

srivatsavdamaraju commited on Nov 5, 2025

Commit

71e073a

verified ·

1 Parent(s): 783941f

Create app.py

Browse files

Files changed (1) hide show

app.py +167 -0

app.py ADDED Viewed

	@@ -0,0 +1,167 @@

+# --------------------------------------------------------------
+# gradio_s3_sql.py
+# --------------------------------------------------------------
+import boto3
+import pandas as pd
+import duckdb
+from io import StringIO
+from botocore.exceptions import ClientError
+import gradio as gr
+# === YOUR IDRIVE E2 CONFIG (hardcoded) ===
+ENDPOINT_URL = "https://s3.us-west-1.idrivee2.com"
+ACCESS_KEY = "rNuPBAQetemqpEeBospZ"
+SECRET_KEY = "BU4FccUYxzXVqiWjPSJM1CWEX1cNhBqbU9NeGidE"
+BUCKET_NAME = "accusagas3"
+# Initialize S3 client
+s3 = boto3.client(
+    "s3",
+    endpoint_url=ENDPOINT_URL,
+    aws_access_key_id=ACCESS_KEY,
+    aws_secret_access_key=SECRET_KEY,
+)
+def run_sql(path: str, sql: str) -> pd.DataFrame:
+    """Core function: S3 → CSV → DuckDB → SQL → DataFrame"""
+    # --- 1. Load CSV from S3 ---
+    try:
+        obj = s3.get_object(Bucket=BUCKET_NAME, Key=path)
+        csv_content = obj["Body"].read().decode("utf-8")
+        df = pd.read_csv(StringIO(csv_content))
+    except ClientError as e:
+        if e.response["Error"]["Code"] == "NoSuchKey":
+            return pd.DataFrame({"error": [f"File not found: {path}"]})
+        return pd.DataFrame({"error": [f"S3 Error: {str(e)}"]})
+    except Exception as e:
+        return pd.DataFrame({"error": [f"CSV read failed: {str(e)}"]})
+    if df.empty:
+        return pd.DataFrame({"error": ["CSV is empty"]})
+    # --- 2. Auto-convert numeric columns ---
+    numeric_keywords = ["price", "amount", "value", "cost", "revenue", "total", "volume", "open", "high", "low", "close"]
+    for col in df.columns:
+        if any(kw in col.lower() for kw in numeric_keywords):
+            # Clean: remove $, %, commas
+            cleaned = df[col].astype(str).str.replace(r"[^\d.-]", "", regex=True)
+            df[col] = pd.to_numeric(cleaned, errors="coerce")
+    # --- 3. Run SQL in DuckDB ---
+    con = duckdb.connect(":memory:")
+    con.register("data", df)
+    norm_sql = sql.strip().lower()
+    if not norm_sql.startswith(("select", "with")):
+        con.close()
+        return pd.DataFrame({"error": ["Only SELECT or WITH queries allowed"]})
+    try:
+        result = con.execute(sql).df()
+    except Exception as e:
+        # Auto-fix: CAST column to DOUBLE if type mismatch
+        if "Cannot compare values of type VARCHAR" in str(e):
+            import re
+            match = re.search(r"column ([a-zA-Z0-9_]+)", str(e), re.I)
+            col = match.group(1) if match else None
+            if col and col in df.columns:
+                fixed_sql = sql.replace(f"{col}", f"CAST({col} AS DOUBLE)")
+                try:
+                    result = con.execute(fixed_sql).df()
+                except Exception as e2:
+                    con.close()
+                    return pd.DataFrame({"error": [f"SQL failed even after cast: {e2}"]})
+            else:
+                con.close()
+                return pd.DataFrame({"error": [f"Type error: {e}"]})
+        else:
+            con.close()
+            return pd.DataFrame({"error": [f"SQL Error: {e}"]})
+    finally:
+        con.close()
+    # Limit to 10,000 rows
+    return result.head(10_000)
+# --------------------------------------------------------------
+# Gradio Interface
+# --------------------------------------------------------------
+with gr.Blocks(title="S3 SQL Query (iDrive e2)") as demo:
+    gr.Markdown(
+        """
+        # S3 CSV SQL Explorer
+        **Query any CSV in your iDrive e2 bucket using SQL**
+        Table name: `data` | Auto-casts `Price`, `Amount`, etc. to numbers
+        """
+    )
+    with gr.Row():
+        path_input = gr.Textbox(
+            label="S3 Path (Key)",
+            placeholder="vatsav_123/reports/Gold Futures Historical Data.csv",
+            lines=1,
+        )
+        sql_input = gr.Textbox(
+            label="SQL Query",
+            placeholder="SELECT Date, Price FROM data WHERE Price > 1000 ORDER BY Date DESC LIMIT 10",
+            lines=4,
+        )
+    run_btn = gr.Button("Run SQL", variant="primary")
+    output = gr.Dataframe(
+        label="Result",
+        interactive=False,
+        wrap=True,
+        height=500,
+    )
+    # Click handler
+    run_btn.click(
+        fn=run_sql,
+        inputs=[path_input, sql_input],
+        outputs=output,
+        show_progress=True,
+    )
+    # Examples
+    gr.Examples(
+        examples=[
+            [
+                "vatsav_123/reports/Gold Futures Historical Data.csv",
+                "SELECT Date, Price FROM data WHERE Price > 2000 ORDER BY Date DESC LIMIT 10"
+            ],
+            [
+                "vatsav_123/reports/Gold Futures Historical Data.csv",
+                "SELECT MIN(Price) AS min_price, MAX(Price) AS max_price FROM data"
+            ],
+            [
+                "vatsav_123/reports/Gold Futures Historical Data.csv",
+                "SELECT * FROM data WHERE Volume > 1000000 LIMIT 5"
+            ],
+        ],
+        inputs=[path_input, sql_input],
+    )
+    gr.Markdown(
+        """
+        **Tips**
+        - Use `data` as table name
+        - Columns like `Price`, `Volume`, `Amount` are auto-converted to numbers
+        - Invalid SQL → clear error message
+        """
+    )
+# --------------------------------------------------------------
+# Launch
+# --------------------------------------------------------------
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,  # Set True for public link
+        debug=True,
+        mcp_server=True
+    )