Spaces:
Sleeping
Sleeping
| # app.py | |
| import re | |
| import gradio as gr | |
| import pandas as pd | |
| from transformers import pipeline | |
| # 1) Load your synthetic SAP data | |
| df = pd.read_csv("synthetic_profit.csv") | |
| # 2) Prepare TAPAS as a fallback (optional) | |
| tapas = pipeline( | |
| "table-question-answering", | |
| model="google/tapas-base-finetuned-wtq", | |
| tokenizer="google/tapas-base-finetuned-wtq", | |
| device=-1 | |
| ) | |
| table = df.astype(str).to_dict(orient="records") | |
| # 3) Mapping words β pandas methods and columns | |
| OPERATIONS = { | |
| "total": "sum", | |
| "sum": "sum", | |
| "average": "mean", | |
| "mean": "mean" | |
| } | |
| COLUMNS = { | |
| "revenue": "Revenue", | |
| "cost": "Cost", | |
| "profit margin": "ProfitMargin", | |
| "profit": "Profit", | |
| "margin": "ProfitMargin" | |
| } | |
| def parse_and_compute(question: str) -> str | None: | |
| q = question.lower() | |
| # 1) What operation? | |
| op = next((OPERATIONS[k] for k in OPERATIONS if k in q), None) | |
| # 2) Which column? | |
| col = next((COLUMNS[k] for k in COLUMNS if k in q), None) | |
| # 3) Which product? | |
| prod = next((p for p in df["Product"].unique() if p.lower() in q), None) | |
| # 4) Which region? (optional) | |
| region = next((r for r in df["Region"].unique() if r.lower() in q), None) | |
| # 5) Which year? | |
| m_y = re.search(r"\b(20\d{2})\b", q) | |
| year = int(m_y.group(1)) if m_y else None | |
| # 6) Which quarter? | |
| qtr = next((fq for fq in df["FiscalQuarter"].unique() if fq.lower() in q), None) | |
| # Must have at least: op, col, prod, year, qtr | |
| if None in (op, col, prod, year, qtr): | |
| return None | |
| # Build the mask | |
| mask = ( | |
| (df["Product"] == prod) & | |
| (df["FiscalYear"] == year) & | |
| (df["FiscalQuarter"] == qtr) | |
| ) | |
| if region: | |
| mask &= (df["Region"] == region) | |
| # Compute | |
| try: | |
| series = df.loc[mask, col] | |
| result = getattr(series, op)() | |
| except Exception: | |
| return None | |
| # Friendly formatting | |
| region_part = f" in {region}" if region else "" | |
| return f"{op.capitalize()} {col} for {prod}{region_part}, {qtr} {year}: {result:.2f}" | |
| def answer(question: str) -> str: | |
| # 1) Try the generic parser + Pandas | |
| out = parse_and_compute(question) | |
| if out is not None: | |
| return out | |
| # 2) Fallback to TAPAS for anything else | |
| try: | |
| res = tapas(table=table, query=question) | |
| return res.get("answer", "No answer found.") | |
| except Exception as e: | |
| return f"β Pipeline error:\n{e}" | |
| # 4) Gradio UI | |
| iface = gr.Interface( | |
| fn=answer, | |
| inputs=gr.Textbox(lines=2, placeholder="e.g. What is the total revenue for Product A in Q1 2024?"), | |
| outputs=gr.Textbox(lines=2), | |
| title="SAP Profitability Q&A", | |
| description=( | |
| "Generic sum/mean parsing via Pandas (region optional), " | |
| "falling back to TAPAS only if the question doesn't match." | |
| ), | |
| allow_flagging="never", | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch(server_name="0.0.0.0", server_port=7860) |