Spaces:

jay0911
/

Iplguru

Sleeping

App Files Files Community

jay0911 commited on Jul 30, 2025

Commit

6c5a9f9

verified ·

1 Parent(s): 9506be9

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -119

app.py CHANGED Viewed

@@ -14,13 +14,13 @@ DATA_FILE_PATH = "IPL.csv"
 # --- Global variable for DataFrame only (can be cached globally as it's simple) ---
 _df_cache = None
-_load_error_cache = None # Store error during initial data loading
 # --- Function to load and prepare the DataFrame (will run once) ---
 def load_and_prepare_data_singleton():
     global _df_cache, _load_error_cache
     if _df_cache is not None:
-        return _df_cache # Return cached DataFrame if already loaded
     try:
         df = pd.read_csv(DATA_FILE_PATH, low_memory=False)
         print("IPL.csv loaded successfully.")
@@ -56,131 +56,69 @@ def load_and_prepare_data_singleton():
         print(_load_error_cache)
         return None
-# --- NEW: Function to load LLM and create Agent (per request, decorated with @spaces.GPU) ---
-# This function is now responsible for loading the LLM and creating the agent
-# within the GPU worker process for each prediction.
-@spaces.GPU # <--- Apply @spaces.GPU here
-def get_llm_and_agent(df):
-    if df is None:
-        raise ValueError("DataFrame not loaded, cannot create agent.")
-    # These checks are now within the GPU-allocated context
-    if not torch.cuda.is_available():
-        raise RuntimeError("Error: CUDA (GPU) is not available. This model requires a GPU.")
-    print(f"CUDA available: {torch.cuda.is_available()}")
-    print(f"CUDA device count: {torch.cuda.device_count()}")
-    print(f"Current CUDA device: {torch.cuda.current_device()}")
-    bnb_config = BitsAndBytesConfig(
-        load_in_4bit=True,
-        bnb_4bit_quant_type="nf4",
-        bnb_4bit_compute_dtype=torch.float16,
-        bnb_4bit_use_double_quant=False,
-    )
-    print(f"Loading LLM: {LLM_MODEL_ID}...")
-    llm_tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_ID, trust_remote_code=True)
-    if llm_tokenizer.pad_token is None:
-        llm_tokenizer.pad_token = llm_tokenizer.eos_token
-    llm_model = AutoModelForCausalLM.from_pretrained(
-        LLM_MODEL_ID,
-        quantization_config=bnb_config,
-        torch_dtype=torch.float16,
-        device_map="auto",
-        trust_remote_code=True,
-    )
-    llm_pipeline = pipeline(
-        "text-generation",
-        model=llm_model,
-        tokenizer=llm_tokenizer,
-        max_new_tokens=1000,
-        do_sample=True,
-        temperature=0.1,
-        top_p=0.9,
-        eos_token_id=llm_tokenizer.eos_token_id,
-        pad_token_id=llm_tokenizer.pad_token_id,
-    )
-    llm = HuggingFacePipeline(pipeline=llm_pipeline)
-    print("LLM loaded and configured.")
-    system_message_content = """
-    You are an expert cricket analyst. You have access to a pandas DataFrame named `df` containing ball-by-ball IPL match data.
-    The DataFrame has the following relevant columns for querying:
-    - 'match_id': Unique ID for each match.
-    - 'date': Date of the match (datetime object).
-    - 'match_type': Type of match (e.g., T20).
-    - 'event_name': Name of the event (e.g., Indian Premier League).
-    - 'innings': The innings number (1 or 2).
-    - 'batting_team': The team currently batting.
-    - 'bowling_team': The team currently bowling.
-    - 'over', 'ball', 'ball_no': Details about the specific ball.
-    - 'batter': The batsman on strike.
-    - 'bat_pos': Batting position.
-    - 'runs_batter': Runs scored by the batsman on that ball (off the bat).
-    - 'balls_faced': Balls faced by the batter up to that point in the innings.
-    - 'bowler': The bowler who bowled that ball.
-    - 'valid_ball': Whether the ball was a valid delivery.
-    - 'runs_extras': Runs scored as extras (wides, no-balls, byes, leg-byes, penalty).
-    - 'runs_total': Total runs scored on that ball (runs_batter + runs_extras).
-    - 'runs_bowler': Runs conceded by the bowler on that ball.
-    - 'extra_type': Type of extra (e.g., 'wides', 'noball').
-    - 'non_striker': The non_striker batsman.
-    - 'wicket_kind': Type of dismissal (e.g., 'bowled', 'caught').
-    - 'player_out': The player dismissed.
-    - 'fielders': Fielders involved in the dismissal.
-    - 'player_of_match': Player of the match.
-    - 'match_won_by': The team that won the match.
-    - 'win_outcome': How the match was won (e.g., 'runs', 'wickets').
-    - 'toss_winner': The team that won the toss.
-    - 'toss_decision': What the toss winner decided to do (bat or bowl).
-    - 'venue': Match venue.
-    - 'city': City where the match was played.
-    - 'year', 'season': Year and IPL season.
-    - 'gender', 'team_type', 'superover_winner', 'result_type', 'method': Other match details.
-    - 'team_runs', 'team_balls', 'team_wicket': Team's total runs, balls, wickets.
-    - 'new_batter', 'batter_runs', 'batter_balls', 'bowler_wicket': Aggregated stats.
-    - 'batting_partners', 'next_batter', 'striker_out': More granular details.
-    - 'total_runs_this_ball': (NEW COLUMN YOU ADDED) Sum of 'runs_batter' and 'runs_extras' for that specific ball.
-    Your goal is to answer user questions about IPL cricket statistics by writing and executing pandas code on the `df` DataFrame.
-    When performing calculations, be precise. For averages, ensure you handle division by zero (e.g., by checking if denominator is zero or using `df.sum() / df.count()` for means).
-    If the answer is a numerical value, just output the number. If it's a specific player or team name, output just the name.
-    If you cannot find the answer in the DataFrame, state that you don't know or that the information is not available.
-    Avoid providing general cricket knowledge not derivable from the DataFrame.
-    Focus solely on extracting information from the 'df' DataFrame. When answering questions about totals or aggregations, consider all relevant rows unless a specific filter (like season or match) is provided.
-    Always try to provide a concise answer directly from the data.
-    """
-    agent = create_pandas_dataframe_agent(
-        llm,
-        df,
-        verbose=True,
-        max_iterations=10,
-        handle_parsing_errors=True,
-        agent_executor_kwargs={"system_message": system_message_content},
-        agent_type="openai-tools",
-        allow_dangerous_code=True
-    )
-    print("Pandas DataFrame Agent created.")
-    return agent
 # --- Gradio Interface Function - this is what the UI calls ---
 def predict_answer(question):
-    global _df_cache, _load_error_cache # Access the globally cached DataFrame
-    if _load_error_cache:
-        return _load_error_cache
     if _df_cache is None:
         return "Internal error: DataFrame not loaded. Please check logs."
     try:
-        # Load LLM and create agent for THIS request within the GPU-allocated context
-        # This function call will trigger the @spaces.GPU decorator.
-        current_agent = get_llm_and_agent(_df_cache)
-        response = current_agent.invoke({"input": question})
         return response['output']
     except Exception as e:
         return f"An error occurred while processing your request: {e}\nPlease try rephrasing your question or check the Space logs for more details."
 # --- Initial setup - These lines run ONCE when the app.py script starts ---
-# Only load the DataFrame initially. LLM and agent are loaded per request.
 print("Starting initial setup: Loading data...")
 _df_cache = load_and_prepare_data_singleton()
 print("Initial data setup complete.")
@@ -213,13 +151,13 @@ else:
         def bot_response(history):
             query = history[-1][0]
-            response = predict_answer(query) # This will now trigger the GPU load
             history[-1][1] = response
             return history
-        msg.submit(user_message, [msg, chatbot], [msg, chatbot], queue=False).then(
             bot_response, chatbot, chatbot
         )
         clear.click(lambda: [], None, chatbot, queue=False)
-demo.queue().launch(debug=True)

 # --- Global variable for DataFrame only (can be cached globally as it's simple) ---
 _df_cache = None
+_load_error_cache = None
 # --- Function to load and prepare the DataFrame (will run once) ---
 def load_and_prepare_data_singleton():
     global _df_cache, _load_error_cache
     if _df_cache is not None:
+        return _df_cache
     try:
         df = pd.read_csv(DATA_FILE_PATH, low_memory=False)
         print("IPL.csv loaded successfully.")
         print(_load_error_cache)
         return None
 # --- Gradio Interface Function - this is what the UI calls ---
+# This function is now fully self-contained and decorated with @spaces.GPU
+@spaces.GPU
 def predict_answer(question):
+    global _df_cache
     if _df_cache is None:
         return "Internal error: DataFrame not loaded. Please check logs."
     try:
+        # Load the LLM and create the agent inside this function call
+        print("Loading LLM and creating agent for this request...")
+        bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_use_double_quant=False,
+        )
+        llm_tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_ID, trust_remote_code=True)
+        if llm_tokenizer.pad_token is None:
+            llm_tokenizer.pad_token = llm_tokenizer.eos_token
+        llm_model = AutoModelForCausalLM.from_pretrained(
+            LLM_MODEL_ID,
+            quantization_config=bnb_config,
+            torch_dtype=torch.float16,
+            device_map="auto",
+            trust_remote_code=True,
+        )
+        llm_pipeline = pipeline(
+            "text-generation",
+            model=llm_model,
+            tokenizer=llm_tokenizer,
+            max_new_tokens=1000,
+            do_sample=True,
+            temperature=0.1,
+            top_p=0.9,
+            eos_token_id=llm_tokenizer.eos_token_id,
+            pad_token_id=llm_tokenizer.pad_token_id,
+        )
+        llm = HuggingFacePipeline(pipeline=llm_pipeline)
+        system_message_content = """
+        You are an expert cricket analyst. You have access to a pandas DataFrame named `df` containing ball-by-ball IPL match data.
+        ... (rest of your system message content) ...
+        """
+        agent = create_pandas_dataframe_agent(
+            llm,
+            _df_cache, # Pass the globally cached DataFrame
+            verbose=True,
+            max_iterations=10,
+            handle_parsing_errors=True,
+            agent_executor_kwargs={"system_message": system_message_content},
+            agent_type="openai-tools",
+            allow_dangerous_code=True
+        )
+        print("Pandas DataFrame Agent created.")
+        response = agent.invoke({"input": question})
         return response['output']
     except Exception as e:
         return f"An error occurred while processing your request: {e}\nPlease try rephrasing your question or check the Space logs for more details."
 # --- Initial setup - These lines run ONCE when the app.py script starts ---
 print("Starting initial setup: Loading data...")
 _df_cache = load_and_prepare_data_singleton()
 print("Initial data setup complete.")
         def bot_response(history):
             query = history[-1][0]
+            response = predict_answer(query)
             history[-1][1] = response
             return history
+        msg.submit(user_message, [msg, chatbot], [msg, chatbot], queue=True).then( # Changed queue=False to queue=True
             bot_response, chatbot, chatbot
         )
         clear.click(lambda: [], None, chatbot, queue=False)
+demo.queue(max_size=20).launch(debug=True)