Spaces:

PD03
/

talk_to_data

Sleeping

App Files Files Community

PD03 commited on Jun 25, 2025

Commit

aa97025

verified ·

1 Parent(s): 60fddfe

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -41

app.py CHANGED Viewed

@@ -1,55 +1,45 @@
-import pandas as pd
 import gradio as gr
-from transformers import pipeline
-from langchain_community.llms import HuggingFacePipeline
-from langchain_experimental.agents import create_pandas_dataframe_agent
-from langchain.agents.agent_types import AgentType
-# Load data
-df = pd.read_csv("synthetic_profit.csv")
-# Lightweight Hugging Face pipeline (Flan-T5-base)
-hf_pipeline = pipeline(
-    task="text2text-generation",
-    model="google/flan-t5-base",
-    device=-1  # CPU
-)
-# LangChain LLM
-llm = HuggingFacePipeline(pipeline=hf_pipeline)
-# Create LangChain agent with explicit parsing-error handling
-agent = create_pandas_dataframe_agent(
-    llm,
-    df,
-    verbose=True,
-    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
-    handle_parsing_errors=True,
-    allow_dangerous_code=True
 )
-# Answer query function with clearer prompts
-def answer(query: str) -> str:
     try:
-        prompt = (
-            f"Answer this clearly and numerically without scientific notation: {query}. "
-            "If multiple numbers, provide their total sum clearly."
-        )
-        response = agent.run(prompt)
-        return f"📊 {response}"
     except Exception as e:
-        return f"❗ Error: {str(e)}"
 # Gradio interface
-demo = gr.Interface(
-    fn=answer,
-    inputs=gr.Textbox(
-        lines=2,
-        placeholder="E.g., 'Total revenue for Product B in EMEA during Q2 2024'"
-    ),
     outputs="text",
-    title="🟢 SAP Profitability Data Chat (Flan-T5 + Pandas)",
-    description="Ask clearly numeric questions about synthetic SAP profitability data. Results are precise and human-readable."
 )
-demo.launch()

+# app.py
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+import pandas as pd
+# Load your synthetic profitability dataset
+df = pd.read_csv('synthetic_profit.csv')
+# Initialize the TAPEX small model fine-tuned on WikiSQL
+MODEL_ID = "microsoft/tapex-small-finetuned-wikisql"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+model     = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID)
+# Build a table-QA pipeline
+table_qa = pipeline(
+    "table-question-answering",
+    model=model,
+    tokenizer=tokenizer,
+    framework="pt",
+    device=-1   # set to 0 if you enable GPU in your Space
 )
+def answer_profitability(question):
+    table = df.to_dict(orient="records")
     try:
+        out = table_qa(table=table, query=question)
+        return out.get("answer", "No answer found.")
     except Exception as e:
+        return f"Error: {e}"
 # Gradio interface
+iface = gr.Interface(
+    fn=answer_profitability,
+    inputs=gr.Textbox(lines=2, placeholder="Ask a question about profitability…"),
     outputs="text",
+    title="SAP Profitability Q&A (TAPEX-Small)",
+    description="""
+    Ask free-form questions on the synthetic profitability dataset.
+    Powered end-to-end by microsoft/tapex-small-finetuned-wikisql.
+    """
 )
+if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0", server_port=7860)