Spaces:

rithwikreal
/

AnalysisApp

Sleeping

App Files Files Community

rithwikreal commited on Sep 28, 2025

Commit

7595864

verified ·

1 Parent(s): 536cf00

Create app.py

Browse files

Files changed (1) hide show

app.py +171 -0

app.py ADDED Viewed

	@@ -0,0 +1,171 @@

+# app.py
+import gradio as gr
+import pandas as pd
+import io
+import re
+import gc
+from typing import Tuple, Optional
+# ---------- Helper functions ----------
+def load_file_bytes_to_df(file) -> Tuple[Optional[pd.DataFrame], Optional[str]]:
+    """
+    Read uploaded file bytes into a DataFrame WITHOUT saving to disk.
+    Returns (df, error_message).
+    """
+    if file is None:
+        return None, "No file uploaded."
+    try:
+        # file is a TemporaryFile-like object in Gradio; read bytes once
+        content = file.read()
+        # use BytesIO to load into pandas
+        # decide by filename or by sniffing bytes
+        name = getattr(file, "name", "") or ""
+        # Basic heuristic for CSV vs Excel
+        if name.lower().endswith(".csv") or b"," in content[:200]:
+            df = pd.read_csv(io.BytesIO(content))
+        else:
+            df = pd.read_excel(io.BytesIO(content))
+    except Exception as e:
+        return None, f"Error reading file: {e}"
+    finally:
+        # Immediately try to remove raw bytes if present to minimize memory lifetime
+        try:
+            del content
+        except Exception:
+            pass
+        gc.collect()
+    return df, None
+def simple_nl_to_action(df: pd.DataFrame, query: str):
+    """Same simple NL parser as before (returns DataFrame or (None, message))."""
+    q = (query or "").strip().lower()
+    if q == "":
+        return None, "Please type a question like: 'show columns', 'show first 5 rows', 'describe column sales', or 'filter where Region = India and Year >= 2021'."
+    if "columns" in q or "show columns" in q or "list columns" in q:
+        return pd.DataFrame({"columns": df.columns}), None
+    m = re.search(r"(first|head)\s*(\d+)?", q)
+    if "head" in q or "first" in q:
+        n = 5
+        if m and m.group(2):
+            n = int(m.group(2))
+        return df.head(n), None
+    if "describe" in q or "summary" in q or "statistics" in q:
+        return df.describe(include='all').reset_index(), None
+    m = re.search(r"show (column )?([a-z0-9_ ]+)", q)
+    if m:
+        col = m.group(2).strip()
+        matches = [c for c in df.columns if c.lower() == col.lower()]
+        if matches:
+            return df[[matches[0]]].head(100), None
+        else:
+            return None, f"Column '{col}' not found. Try 'show columns' to see exact names."
+    m = re.search(r"filter where ([a-z0-9_ ]+?)\s*(=|>|<|>=|<=)\s*'?(?P<val>[^']+?)'?$", q)
+    if m:
+        col_text = m.group(1).strip()
+        op = m.group(2)
+        val = m.group('val').strip()
+        matches = [c for c in df.columns if c.lower() == col_text.lower()]
+        if not matches:
+            return None, f"Column '{col_text}' not found. Use 'show columns' to check names."
+        colname = matches[0]
+        try:
+            if pd.api.types.is_numeric_dtype(df[colname]):
+                val_num = float(val)
+                if op == "=":
+                    res = df[df[colname] == val_num]
+                elif op == ">":
+                    res = df[df[colname] > val_num]
+                elif op == "<":
+                    res = df[df[colname] < val_num]
+                elif op == ">=":
+                    res = df[df[colname] >= val_num]
+                elif op == "<=":
+                    res = df[df[colname] <= val_num]
+                return res.head(200), None
+            else:
+                if op == "=":
+                    res = df[df[colname].astype(str).str.lower() == val.lower()]
+                    return res.head(200), None
+                else:
+                    return None, f"Operator {op} not supported for non-numeric column '{colname}'."
+        except Exception as e:
+            return None, f"Error applying filter: {e}"
+    return df.head(10), "Couldn't parse exact request — showing first 10 rows. Try: 'show columns', 'show first 5 rows', 'describe', or 'filter where Column = Value'."
+# ---------- Processing wrapper ----------
+def process(file, query):
+    # Load into memory-only DataFrame
+    df, err = load_file_bytes_to_df(file)
+    if err:
+        # Ensure any partial objects are removed
+        try:
+            del file
+        except Exception:
+            pass
+        gc.collect()
+        return None, err
+    # Run the NLP-to-action
+    try:
+        res, msg = simple_nl_to_action(df, query)
+        # Convert DataFrame result if any to a safe small object (head) to limit memory/time
+        if isinstance(res, pd.DataFrame):
+            out_df = res.copy()  # shallow copy that we will return
+        else:
+            out_df = None
+    except Exception as e:
+        out_df = None
+        msg = f"Error while processing: {e}"
+    # --- IMPORTANT: Remove references to large objects immediately ---
+    try:
+        del df
+        del file
+    except Exception:
+        pass
+    gc.collect()
+    if isinstance(out_df, pd.DataFrame):
+        return out_df, (msg or "OK")
+    else:
+        return None, (msg or "No result")
+# ---------- Clear / reset function ----------
+def clear_all():
+    """
+    Returns Gradio update objects that clear inputs and outputs.
+    This helps remove file from the browser UI and server-side.
+    """
+    # gradio update helpers: set inputs/outputs back to empty values
+    return (
+        gr.File.update(value=None),
+        gr.Textbox.update(value=""),
+        gr.Dataframe.update(value=None),
+        gr.Textbox.update(value=""),
+    )
+# ---------- Gradio UI ----------
+with gr.Blocks() as demo:
+    gr.Markdown("# Chat-with-CSV — private ephemeral uploads (cleared on Reset)")
+    with gr.Row():
+        file_input = gr.File(label="Upload CSV or XLSX (will not be saved)", file_count="single")
+        query_input = gr.Textbox(label="Ask a question (example: 'show columns' or 'filter where Country = India')", placeholder="Type your question here")
+    with gr.Row():
+        submit = gr.Button("Run query")
+        clear_btn = gr.Button("Clear / Reset (remove uploaded file & results)")
+    output_table = gr.Dataframe(headers="auto", label="Result table")
+    status = gr.Textbox(label="Status / Messages", interactive=False)
+    submit.click(fn=process, inputs=[file_input, query_input], outputs=[output_table, status])
+    # Clear button clears UI (and removes server-side references)
+    clear_btn.click(fn=clear_all, inputs=None, outputs=[file_input, query_input, output_table, status])
+if __name__ == "__main__":
+    # Do not enable "share" or persistent caching here; default launch is fine for Spaces
+    demo.launch()