Spaces:

jay0911
/

Iplguru

Sleeping

App Files Files Community

jay0911 commited on Jul 30, 2025

Commit

67d7ab8

verified ·

1 Parent(s): d2ddebe

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -46

app.py CHANGED Viewed

@@ -1,22 +1,28 @@
 import pandas as pd
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
-# from langchain.agents import create_pandas_dataframe_agent
 from langchain_experimental.agents import create_pandas_dataframe_agent
 from langchain_community.llms import HuggingFacePipeline
 from langchain_core.messages import SystemMessage
 import gradio as gr
-import os # For checking file existence
 # --- Configuration ---
-# You might want to make these environment variables in a real deployment
-# but for a basic Space, hardcoding is fine for small models.
 LLM_MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"
 DATA_FILE_PATH = "IPL.csv"
-# --- 1. Load and Prepare the DataFrame ---
-@gr.cache
-def load_and_prepare_data():
     try:
         df = pd.read_csv(DATA_FILE_PATH)
         print("IPL.csv loaded successfully.")
@@ -27,19 +33,25 @@ def load_and_prepare_data():
             df['date'] = pd.to_datetime(df['date'], errors='coerce')
         df['total_runs_this_ball'] = df['runs_off_bat'] + df['extras_run']
         print("DataFrame prepared.")
-        return df
     except FileNotFoundError:
         return None
     except Exception as e:
-        print(f"Error loading or preparing data: {e}")
         return None
-# --- 2. Initialize the Code-Generating LLM ---
-# This function will be called once when the Gradio app starts
-@gr.cache
-def load_llm_and_agent(df):
     if df is None:
-        return None, "Error: Data not loaded."
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
@@ -76,10 +88,9 @@ def load_llm_and_agent(df):
     llm = HuggingFacePipeline(pipeline=llm_pipeline)
     print("LLM loaded and configured.")
-    # --- 3. Create the Pandas DataFrame Agent ---
-    system_message_content = f"""
     You are an expert cricket analyst. You have access to a pandas DataFrame named `df` containing ball-by-ball IPL match data.
-    The DataFrame has columns : {df.columns}.
     Your goal is to answer user questions about IPL cricket statistics by writing and executing pandas code.
     When performing calculations, be precise. For averages, ensure you handle division by zero.
     If the answer is a numerical value, just output the number. If it's a specific player or team name, output just the name.
@@ -91,73 +102,82 @@ def load_llm_and_agent(df):
     agent = create_pandas_dataframe_agent(
         llm,
         df,
-        verbose=True, # Will print agent's thought process to the Space logs
         agent_executor_kwargs={"handle_parsing_errors": True, "max_iterations": 10},
         agent_type="openai-tools",
-        # Pass system message as part of agent creation if supported or through the prompt template
-        # Note: Depending on LangChain version and agent type, directly injecting system_message might vary.
-        # This structure is generally accepted.
-        # For agent_type="openai-tools", the system message is typically passed to the LLM directly by the agent executor.
     )
     print("Pandas DataFrame Agent created.")
-    return agent, None
-# --- Gradio Interface Function ---
 def predict_answer(question):
-    global agent_instance # Use global to avoid re-loading agent on every call
-    if agent_instance is None:
-        df_data = load_and_prepare_data()
-        if df_data is None:
-            return "Error: Could not load IPL data. Please check logs."
-        agent_instance, error = load_llm_and_agent(df_data)
-        if agent_instance is None:
-            return error
     try:
-        response = agent_instance.invoke({"input": question})
         return response['output']
     except Exception as e:
         return f"An error occurred while processing your request: {e}\nPlease try rephrasing your question or check the Space logs for more details."
-# --- Initial setup for the global agent_instance ---
-# This part runs when the script starts (once on Space boot-up)
-df_global = load_and_prepare_data()
-agent_instance, initial_load_error = load_llm_and_agent(df_global)
 # --- Gradio UI ---
-if initial_load_error:
     with gr.Blocks() as demo:
-        gr.Markdown("# IPL Cricket Data Agent (Error)")
-        gr.Markdown(f"### Initialization Error: {initial_load_error}")
-        gr.Markdown("Please check the Space logs for more details.")
 else:
     with gr.Blocks() as demo:
         gr.Markdown("# IPL Cricket Data Agent")
         gr.Markdown(
             "Ask me anything about the IPL dataset! "
-            "For example: 'Who won the match between MI and CSK in 2023 on 2023-05-18?', "
             "'List the top 5 batsmen by total runs scored across all seasons.', "
-            "'What is the total number of no-balls bowled in the entire dataset?'"
         )
         chatbot = gr.Chatbot(label="Cricket Analyst")
-        msg = gr.Textbox(label="Your Question")
         clear = gr.Button("Clear")
         def user_message(user_message, history):
             history = history + [[user_message, None]]
             return "", history
         def bot_response(history):
             query = history[-1][0]
             response = predict_answer(query)
-            history[-1][1] = response
             return history
         msg.submit(user_message, [msg, chatbot], [msg, chatbot], queue=False).then(
             bot_response, chatbot, chatbot
         )
-        clear.click(lambda: None, None, chatbot, queue=False)
 demo.queue().launch(debug=True)

 import pandas as pd
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
 from langchain_experimental.agents import create_pandas_dataframe_agent
 from langchain_community.llms import HuggingFacePipeline
 from langchain_core.messages import SystemMessage
 import gradio as gr
+import os
 # --- Configuration ---
 LLM_MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"
 DATA_FILE_PATH = "IPL.csv"
+# --- Global variables for manual caching ---
+# These will hold our loaded DataFrame and agent instance
+_df_cache = None
+_agent_cache = None
+_load_error_cache = None # To store any error during initial load
+# --- Function to load and prepare the DataFrame (will run once) ---
+def load_and_prepare_data_singleton():
+    global _df_cache, _load_error_cache
+    if _df_cache is not None:
+        return _df_cache # Return cached DataFrame if already loaded
     try:
         df = pd.read_csv(DATA_FILE_PATH)
         print("IPL.csv loaded successfully.")
             df['date'] = pd.to_datetime(df['date'], errors='coerce')
         df['total_runs_this_ball'] = df['runs_off_bat'] + df['extras_run']
         print("DataFrame prepared.")
+        _df_cache = df # Cache the loaded DataFrame for future use
+        return _df_cache
     except FileNotFoundError:
+        _load_error_cache = "Error: IPL.csv not found. Make sure it's in the Space."
+        print(_load_error_cache)
         return None
     except Exception as e:
+        _load_error_cache = f"Error loading or preparing data: {e}"
+        print(_load_error_cache)
         return None
+# --- Function to load LLM and create Agent (will run once) ---
+def load_llm_and_agent_singleton(df):
+    global _agent_cache, _load_error_cache
+    if _agent_cache is not None:
+        return _agent_cache, None # Return cached agent if already loaded
     if df is None:
+        return None, _load_error_cache # Propagate error if DataFrame failed to load
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
     llm = HuggingFacePipeline(pipeline=llm_pipeline)
     print("LLM loaded and configured.")
+    system_message_content = """
     You are an expert cricket analyst. You have access to a pandas DataFrame named `df` containing ball-by-ball IPL match data.
+    The DataFrame has columns like 'id', 'inning', 'overs', 'ballnumber', 'batsman', 'non_striker', 'bowler', 'runs_off_bat', 'extras_run', 'total_runs_this_ball', 'iswicketdelivery', 'player_out', 'kind', 'fielders_involved', 'bowlingteam', 'battingteam', 'striker', 'nonstriker', 'extra_type', 'byes_run', 'legbyes_run', 'noball_run', 'penalty_run', 'out_type', 'matchid', 'team1', 'team2', 'venue', 'date', 'winningteam', 'player_of_match', 'season'.
     Your goal is to answer user questions about IPL cricket statistics by writing and executing pandas code.
     When performing calculations, be precise. For averages, ensure you handle division by zero.
     If the answer is a numerical value, just output the number. If it's a specific player or team name, output just the name.
     agent = create_pandas_dataframe_agent(
         llm,
         df,
+        verbose=True,
         agent_executor_kwargs={"handle_parsing_errors": True, "max_iterations": 10},
         agent_type="openai-tools",
     )
     print("Pandas DataFrame Agent created.")
+    _agent_cache = agent # Cache the agent instance for future use
+    return _agent_cache, None
+# --- Gradio Interface Function - this is what the UI calls ---
 def predict_answer(question):
+    # This ensures loading happens only once on app startup (or first request)
+    # The global variables _df_cache, _agent_cache, _load_error_cache
+    # are populated by the code running outside this function on script startup.
+    if _load_error_cache: # If there was an error during initial setup
+        return _load_error_cache
+    if _agent_cache is None or _df_cache is None: # Should not happen if initial setup worked
+        return "Internal error: Model or data not loaded. Please check logs."
     try:
+        response = _agent_cache.invoke({"input": question})
         return response['output']
     except Exception as e:
+        # Log the full traceback if possible in a production setting
         return f"An error occurred while processing your request: {e}\nPlease try rephrasing your question or check the Space logs for more details."
+# --- Initial setup - These lines run ONCE when the app.py script starts ---
+# This is where the heavy loading happens.
+print("Starting initial setup: Loading data and model...")
+_df_cache = load_and_prepare_data_singleton()
+_agent_cache, _load_error_cache = load_llm_and_agent_singleton(_df_cache)
+print("Initial setup complete.")
 # --- Gradio UI ---
+# Check if initial loading encountered an error before launching the UI
+if _load_error_cache:
     with gr.Blocks() as demo:
+        gr.Markdown("# IPL Cricket Data Agent (Initialization Error)")
+        gr.Markdown(f"### An error occurred during startup:")
+        gr.Markdown(f"```{_load_error_cache}```")
+        gr.Markdown("Please check the Space logs for more details and ensure `IPL.csv` is correctly uploaded.")
 else:
     with gr.Blocks() as demo:
         gr.Markdown("# IPL Cricket Data Agent")
         gr.Markdown(
             "Ask me anything about the IPL dataset! "
+            "For example: 'How many matches are in the dataset?', "
+            "'Who won the match between MI and CSK in 2023 on 2023-05-18?', "
             "'List the top 5 batsmen by total runs scored across all seasons.', "
+            "'Which bowler has taken the most wickets in the 2024 season?', "
+            "'What is the average number of runs scored per over in the 2023 season?'"
         )
         chatbot = gr.Chatbot(label="Cricket Analyst")
+        msg = gr.Textbox(label="Your Question", placeholder="Type your question here...")
         clear = gr.Button("Clear")
         def user_message(user_message, history):
+            # Append user message immediately for responsiveness
             history = history + [[user_message, None]]
             return "", history
         def bot_response(history):
+            # Get the last user message and call predict_answer
             query = history[-1][0]
             response = predict_answer(query)
+            history[-1][1] = response # Update the bot's response
             return history
+        # Event listeners for Gradio components
         msg.submit(user_message, [msg, chatbot], [msg, chatbot], queue=False).then(
             bot_response, chatbot, chatbot
         )
+        clear.click(lambda: [], None, chatbot, queue=False) # Clear history
+# Launch the Gradio app
 demo.queue().launch(debug=True)