""" SQL API Endpoint - Gradio-based API for remote SQL execution. Deploy alongside Streamlit or as standalone Space. """ import gradio as gr import duckdb import pandas as pd import json def execute_sql(sql_query: str, max_rows: int = 1000) -> str: """ Execute SQL query via DuckDB with HTTPFS. Returns JSON string with results or error. """ try: con = duckdb.connect(':memory:') con.execute("INSTALL httpfs; LOAD httpfs;") # Add LIMIT if not present to prevent OOM sql_lower = sql_query.lower().strip() if 'limit' not in sql_lower: sql_query = f"{sql_query.rstrip(';')} LIMIT {max_rows}" result = con.execute(sql_query).fetchdf() return json.dumps({ "status": "success", "rows": len(result), "columns": list(result.columns), "data": result.to_dict(orient='records') }, default=str) except Exception as e: return json.dumps({ "status": "error", "message": str(e) }) # Gradio Interface demo = gr.Interface( fn=execute_sql, inputs=[ gr.Textbox(label="SQL Query", lines=5, placeholder="SELECT * FROM read_parquet('URL')"), gr.Number(label="Max Rows", value=1000, precision=0) ], outputs=gr.JSON(label="Result"), title="🦆 DuckDB SQL API", description="Execute SQL queries on remote HF Parquet files. Results returned as JSON.", examples=[ ["SELECT COUNT(*) as total FROM read_parquet('https://huggingface.co/datasets/gionuibk/hyperliquid-l4-data/resolve/main/data/l4_universal_data_1765167177_1.parquet')", 10], ] ) if __name__ == "__main__": demo.launch(server_port=7860)