File size: 2,546 Bytes
71e073a
ab11537
eeba218
71e073a
 
 
 
 
 
ab11537
71e073a
a7efbf2
 
ab11537
71e073a
ab11537
 
 
71e073a
 
 
 
ab11537
a4c1e5e
71e073a
ab11537
71e073a
 
ab11537
71e073a
ab11537
71e073a
ab11537
 
71e073a
 
 
 
ab11537
71e073a
ab11537
71e073a
 
 
 
ab11537
 
 
 
 
 
71e073a
 
ab11537
71e073a
 
ab11537
71e073a
 
 
ab11537
71e073a
 
ab11537
 
 
 
 
 
 
71e073a
ab11537
a4c1e5e
71e073a
 
944ff78
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# --------------------------------------------------------------
# simple_s3_sql.py
# ------------------------------------------------------------
import boto3
import pandas as pd
import duckdb
from io import StringIO
import gradio as gr

# === YOUR CREDENTIALS ===
ENDPOINT_URL = "https://s3.us-west-1.idrivee2.com"
ACCESS_KEY = "uCXpJ8HQizCR6i807G8A"
SECRET_KEY = "WanKkPK913Y0N1qFKhO6q2H72JzheUgz8rdz6rZx"
BUCKET = "accusagas3"

s3 = boto3.client("s3", endpoint_url=ENDPOINT_URL,
                  aws_access_key_id=ACCESS_KEY,
                  aws_secret_access_key=SECRET_KEY)


def run_sql(path: str, sql: str) -> pd.DataFrame:
    try:
        obj = s3.get_object(Bucket=BUCKET, Key=path)
        df = pd.read_csv(StringIO(obj["Body"].read().decode("utf-8")))
    except Exception as e:
        return pd.DataFrame({"error": [str(e)]})

    if df.empty:
        return pd.DataFrame({"error": ["Empty CSV"]})

    # Auto-convert Price, Volume, etc.
    for col in df.columns:
        if any(x in col.lower() for x in ["price", "volume", "amount"]):
            df[col] = pd.to_numeric(df[col].astype(str).str.replace(r"[^\d.-]", "", regex=True), errors="coerce")

    con = duckdb.connect(":memory:")
    con.register("data", df)

    if not sql.strip().lower().startswith(("select", "with")):
        con.close()
        return pd.DataFrame({"error": ["Only SELECT allowed"]})

    try:
        result = con.execute(sql).df()
    except Exception as e:
        # Try auto-casting
        if "VARCHAR" in str(e):
            col = __import__("re").search(r"column ([a-zA-Z0-9_]+)", str(e))
            if col and (c := col.group(1)) in df.columns:
                sql = sql.replace(c, f"CAST({c} AS DOUBLE)")
                result = con.execute(sql).df()
            else:
                con.close()
                return pd.DataFrame({"error": [str(e)]})
        else:
            con.close()
            return pd.DataFrame({"error": [str(e)]})
    finally:
        con.close()

    return result.head(10000)


# === GRADIO UI ===
with gr.Blocks() as app:
    gr.Markdown("## S3 CSV SQL")
    path = gr.Textbox(label="S3 Path", placeholder="vatsav_123/reports/Gold Futures Historical Data.csv")
    sql = gr.Textbox(label="SQL", lines=3, placeholder="SELECT Date, Price FROM data WHERE Price > 1000")
    btn = gr.Button("Run")
    out = gr.Dataframe()  # Only this – no height, no extras

    btn.click(run_sql, [path, sql], out)


if __name__ == "__main__":
    app.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True)