Spaces:
Sleeping
Sleeping
Sw1ft0
Add special header case handling to data loading section. Append schema with example rows to be passed to Gemini for context. Improve the prompt and format_result function.
7f64dba
| import pandas as pd | |
| import gradio as gr | |
| import numpy as np | |
| import os | |
| import google.generativeai as genai # requires GOOGLE_API_KEY set as env var | |
| # 1. Configure Gemini | |
| genai.configure(api_key=os.getenv("GEMINI_API_KEY")) | |
| model = genai.GenerativeModel("gemini-2.5-pro") | |
| # 2. Load Excel data | |
| df2 = pd.read_excel(os.path.join("data_source", "The Alex Ideas Report.xlsx"), sheet_name=None) | |
| # Load all sheets, handle special header case | |
| df1_all = pd.read_excel( | |
| os.path.join("data_source", "OC Onboarding Information.xlsx"), | |
| sheet_name=None, | |
| header=None # load raw to inspect | |
| ) | |
| df1 = {} | |
| for sheet, raw_df in df1_all.items(): | |
| if sheet == "PY Event Diary": | |
| # use row 2 as header | |
| df1[sheet] = pd.read_excel( | |
| os.path.join("data_source", "OC Onboarding Information.xlsx"), | |
| sheet_name=sheet, | |
| header=1 | |
| ) | |
| else: | |
| df1[sheet] = pd.read_excel( | |
| os.path.join("data_source", "OC Onboarding Information.xlsx"), | |
| sheet_name=sheet | |
| ) | |
| # Build schema info for prompts | |
| def get_schema_info(): | |
| lines = ["Report 1 - OC Onboarding Information:"] | |
| for sheet, df in df1.items(): | |
| lines.append(f"Sheet: {sheet}, Columns: {list(df.columns)}") | |
| sample = df.head(1).to_dict(orient="records")[0] | |
| lines.append(f"Example row: {sample}") | |
| lines.append("\nReport 2 - The Alex Ideas Report:") | |
| for sheet, df in df2.items(): | |
| lines.append(f"Sheet: {sheet}, Columns: {list(df.columns)}") | |
| sample = df.head(1).to_dict(orient="records")[0] | |
| lines.append(f"Example row: {sample}") | |
| return "\n".join(lines) | |
| schema_info = get_schema_info() | |
| # Helper to format results nicely | |
| def format_result(result): | |
| # Convert numpy scalars | |
| if isinstance(result, np.generic): | |
| return round(result.item(), 2) | |
| if isinstance(result, (int, float)): | |
| return round(result, 2) | |
| # Convert dicts into readable strings | |
| if isinstance(result, dict): | |
| return "\n".join([f"{k}: {format_result(v)}" for k, v in result.items()]) | |
| # Convert lists into comma-separated string | |
| if isinstance(result, list): | |
| return ", ".join(map(str, result)) | |
| if isinstance(result, pd.Series): | |
| return result.to_string() | |
| if isinstance(result, pd.DataFrame): | |
| return result.head().to_string(index=False) | |
| return str(result) | |
| # 3. Core function | |
| def answer_question(history, message): | |
| """ | |
| history: chat history (list of [user, assistant] pairs) | |
| message: latest user message (string) | |
| """ | |
| prompt = f""" | |
| You are a data analysis assistant. | |
| You can ONLY answer questions using the two Excel reports provided (df1 and df2). | |
| Do not hallucinate or use external knowledge. | |
| The reports are loaded as dictionaries of DataFrames: | |
| - Access Report 1 with df1['SheetName'] | |
| - Access Report 2 with df2['SheetName'] | |
| Do not reload Excel files with pandas. | |
| If unsure is the question relevant, try to reason using columns available. | |
| If absolutely no relation to provided sheets, respond with: | |
| "I can only answer questions about the provided Excel reports." | |
| The reports have the following schema: | |
| {schema_info} | |
| The user asked: | |
| {message} | |
| Rules: | |
| - Use only pandas, df1, df2, and Python built-ins. | |
| - Do NOT write import statements (pandas is already imported as pd). | |
| - Always put the answer in a variable named `result`. | |
| - Return ONLY Python code, nothing else. | |
| - If multiple values are tied for the maximum, include all of them in a list. | |
| - If result is numeric, round to 2 decimal places. | |
| - If result is a list, return the full list (not just the first element). | |
| - If a column is missing, return a clear error string in `result`, do not crash. | |
| """ | |
| try: | |
| # Ask Gemini | |
| response = model.generate_content(prompt) | |
| code = response.text.strip() | |
| # Strip markdown fences if any | |
| if code.startswith("```"): | |
| code = code.strip("```").replace("python", "").strip() | |
| # Remove dangerous or irrelevant lines | |
| safe_lines = [] | |
| for line in code.splitlines(): | |
| if line.strip().startswith("import"): | |
| continue | |
| if line.strip().startswith("!"): # e.g. shell commands | |
| continue | |
| safe_lines.append(line) | |
| safe_code = "\n".join(safe_lines) | |
| # Prepare sandbox | |
| local_vars = {"df1": df1, "df2": df2, "pd": pd} | |
| # Execute code | |
| exec(safe_code, {"__builtins__": {}}, local_vars) | |
| # Fetch result | |
| result = local_vars.get("result", "No result produced") | |
| return str(format_result(result)) | |
| except Exception as e: | |
| return f"Execution error: {e}" | |
| # 5. Gradio UI | |
| with gr.Blocks() as demo: | |
| chatbot = gr.Chatbot() | |
| msg = gr.Textbox(placeholder="Ask me a question about the reports...") | |
| clear = gr.ClearButton([msg, chatbot]) | |
| def respond(message, chat_history): | |
| answer = answer_question(chat_history, message) | |
| chat_history.append((message, answer)) | |
| return "", chat_history | |
| msg.submit(respond, [msg, chatbot], [msg, chatbot]) | |
| # 6. Run locally (Spaces will call demo.launch() automatically) | |
| if __name__ == "__main__": | |
| demo.launch() | |