Spaces:

sunbv56
/

V-LegalQA-Chatbot

Sleeping

App Files Files Community

52100322-TruongBinhThuan commited on May 2, 2025

Commit

89ab967

2 Parent(s): d36d085 12dfc47

Merge branch 'main' of https://huggingface.co/spaces/sunbv56/V-LegalQA-Chatbot

Browse files

Files changed (1) hide show

app.py +184 -1013

app.py CHANGED Viewed

@@ -1,23 +1,17 @@
-import uuid
-import time
-import json
-import gradio as gr
 import torch
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-import modelscope_studio.components.antd as antd
-import modelscope_studio.components.antdx as antdx
-import modelscope_studio.components.base as ms
-import modelscope_studio.components.pro as pro
-# Removed: import dashscope
-from config import DEFAULT_LOCALE, DEFAULT_SETTINGS, DEFAULT_THEME, DEFAULT_SUGGESTIONS, save_history, get_text, user_config, bot_config, welcome_config #, api_key # Removed api_key
-# Removed: from dashscope import Generation
-# --- Model Loading ---
-print("Setting up device...")
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"Using device: {device}")
-loaded_models = {}
 # Sử dụng try-except để xử lý lỗi nếu không tải được mô hình
 try:
@@ -25,7 +19,6 @@ try:
     print(f"Loading model: {model_name_1}...")
     model_1 = AutoModelForSeq2SeqLM.from_pretrained(model_name_1).to(device)
     tokenizer_1 = AutoTokenizer.from_pretrained(model_name_1)
-    loaded_models[model_name_1] = {"model": model_1, "tokenizer": tokenizer_1}
     print(f"Model {model_name_1} loaded successfully.")
 except Exception as e:
     print(f"Error loading model {model_name_1}: {e}")
@@ -35,1022 +28,200 @@ try:
     print(f"Loading model: {model_name_2}...")
     model_2 = AutoModelForSeq2SeqLM.from_pretrained(model_name_2).to(device)
     tokenizer_2 = AutoTokenizer.from_pretrained(model_name_2)
-    loaded_models[model_name_2] = {"model": model_2, "tokenizer": tokenizer_2}
     print(f"Model {model_name_2} loaded successfully.")
 except Exception as e:
     print(f"Error loading model {model_name_2}: {e}")
 # Bỏ qua việc tải model_3 (ViLawT5_RL)
 try:
     model_name_4 = "sunbv56/V-LegalQA"
     print(f"Loading model: {model_name_4}...")
     model_4 = AutoModelForSeq2SeqLM.from_pretrained(model_name_4).to(device)
     tokenizer_4 = AutoTokenizer.from_pretrained(model_name_4)
-    loaded_models[model_name_4] = {"model": model_4, "tokenizer": tokenizer_4}
     print(f"Model {model_name_4} loaded successfully.")
 except Exception as e:
     print(f"Error loading model {model_name_4}: {e}")
-if not loaded_models:
-     print("\n" + "="*50)
-     print("FATAL ERROR: No models could be loaded. The application cannot run.")
-     print("Please check model names, network connection, and available disk space.")
-     print("="*50 + "\n")
-     # Optionally raise an error or exit here if running as a script
-     # raise RuntimeError("No models loaded successfully!")
-     # exit() # Or sys.exit(1) after importing sys
-# --- Update Model Options based on loaded models ---
-# Original MODEL_OPTIONS_MAP structure from config.py (assuming it looks like this)
-# Replace this with your actual definition from config.py if different
-MODEL_OPTIONS_MAP = {
-    "label": get_text("Model", "模型"),
-    "name": "model",
-    "choices": [
-        # Populate this dynamically
-    ],
-    "info": get_text("Select the model you want to use", "请选择需要使用的模型"),
-}
-# Populate choices dynamically
-AVAILABLE_MODEL_OPTIONS = []
-for name in loaded_models.keys():
-    # Use the name itself as the label, or define more descriptive labels
-    label = name.split('/')[-1] # Get 'ViLawT5_QAChatBot' etc. as label
-    AVAILABLE_MODEL_OPTIONS.append({"label": label, "value": name})
-MODEL_OPTIONS_MAP["choices"] = AVAILABLE_MODEL_OPTIONS
-# Update DEFAULT_SETTINGS to use the first available model
-if AVAILABLE_MODEL_OPTIONS:
-    DEFAULT_SETTINGS['model'] = AVAILABLE_MODEL_OPTIONS[0]['value']
-else:
-    # Handle the case where no models are loaded - set a default or handle error
-    DEFAULT_SETTINGS['model'] = None
-    print("Warning: No models loaded, model selection will be empty.")
-# --- Gradio UI and Events ---
-# Removed: dashscope.api_key = api_key
-# Removed: format_history function (not needed for simple seq2seq input)
-class Gradio_Events:
-    @staticmethod
-    def submit(state_value):
-        start_time = time.time()
-        history = state_value["conversation_contexts"][
-            state_value["conversation_id"]]["history"]
-        settings = state_value["conversation_contexts"][
-            state_value["conversation_id"]]["settings"]
-        # enable_thinking = state_value["conversation_contexts"][
-        #     state_value["conversation_id"]]["enable_thinking"] # Keep if needed for UI, but generation logic changes
-        model_name = settings.get("model")
-        # Ensure a model is selected and loaded
-        if not model_name or model_name not in loaded_models:
-            error_msg = f"Error: Model '{model_name}' is not available or not selected."
-            print(error_msg)
-            history.append({
-                "role": "assistant",
-                "content": [{"type": "text", "content": f'<span style="color: var(--color-red-500)">{error_msg}</span>'}],
-                "key": str(uuid.uuid4()),
-                "header": "Error",
-                "loading": False,
-                "status": "error"
-            })
-            yield {
-                chatbot: gr.update(value=history),
-                state: gr.update(value=state_value),
-            }
-            return # Stop processing this submission
-        # Get the actual model and tokenizer objects
-        selected_model_info = loaded_models[model_name]
-        model = selected_model_info["model"]
-        tokenizer = selected_model_info["tokenizer"]
-        model_label = next((item['label'] for item in AVAILABLE_MODEL_OPTIONS if item['value'] == model_name), model_name)
-        # --- Prepare Input for Seq2Seq Model ---
-        # Use the last user message as input. Adjust if your models need specific formatting.
-        if len(history) < 1 or history[-1]["role"] != "user":
-             # This case should ideally not happen if submit is called after add_message
-             user_input = "Hello" # Default or fetch differently
-             print("Warning: Could not find the last user message, using default.")
-        else:
-             user_input = history[-1]["content"]
-        # Simple prompt format (adjust if needed for your specific models)
-        # Example: Some models might expect "question: <query>" or similar
-        prompt = f"question: {user_input}" # Adjust this format as needed!
-        print(f"Using model: {model_name}")
-        print(f"Input prompt: {prompt}")
-        # Add placeholder for assistant response
-        history.append({
-            "role":
-            "assistant",
-            "content": [],
-            "key":
-            str(uuid.uuid4()),
-            "header": model_label, # Use the label from options
-            "loading":
-            True,
-            "status":
-            "pending"
-        })
-        yield {
-            chatbot: gr.update(value=history),
-            state: gr.update(value=state_value),
-        }
-        try:
-            # --- Tokenize and Generate ---
-            inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device) # Adjust max_length
-            # Generation parameters (tune these for your models)
-            generation_kwargs = {
-                "max_length": 512,  # Adjust max output length
-                "num_beams": 5,     # Beam search
-                "early_stopping": True,
-                # Add other parameters like temperature, top_k, top_p if desired
-                # "temperature": 0.7,
-                # "top_k": 50,
-            }
-            print(f"Generating with kwargs: {generation_kwargs}")
-            with torch.no_grad(): # Important for inference
-                 outputs = model.generate(**inputs, **generation_kwargs)
-            # --- Decode Response ---
-            response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-            print(f"Raw response: {response_text}")
-            # --- Update History ---
-            history[-1]["content"] = [{"type": "text", "content": response_text}]
-            history[-1]["loading"] = False
-            history[-1]["status"] = "done"
-            cost_time = "{:.2f}".format(time.time() - start_time)
-            history[-1]["footer"] = get_text(f"{cost_time}s", f"用时{cost_time}s")
-            yield {
-                chatbot: gr.update(value=history),
-                state: gr.update(value=state_value),
-            }
-        except Exception as e:
-            print(f"Error during generation with model {model_name}: {e}")
-            history[-1]["loading"] = False
-            history[-1]["status"] = "error" # Use 'error' status
-            history[-1]["content"] = [{
-                "type":
-                "text",
-                "content":
-                f'<span style="color: var(--color-red-500)">Error during generation: {str(e)}</span>'
-            }]
-            yield {
-                chatbot: gr.update(value=history),
-                state: gr.update(value=state_value)
-            }
-            # Re-raise if you want the error to propagate further, or handle it here
-            # raise e
-    @staticmethod
-    def add_message(input_value, settings_form_value, thinking_btn_state_value, # Keep thinking_btn_state if UI uses it
-                    state_value):
-        if not input_value or input_value.strip() == "":
-             print("Empty input, skipping.")
-             # Optionally return an update to clear the input without submitting
-             # return { input: gr.update(value="") }
-             return gr.skip() # Skip the entire process if input is empty
-        if not state_value["conversation_id"]:
-            random_id = str(uuid.uuid4())
-            history = []
-            state_value["conversation_id"] = random_id
-            # Ensure default settings (including the default model) are applied
-            current_settings = settings_form_value if settings_form_value else DEFAULT_SETTINGS.copy()
-            if not current_settings.get('model') and AVAILABLE_MODEL_OPTIONS:
-                 current_settings['model'] = AVAILABLE_MODEL_OPTIONS[0]['value']
-            state_value["conversation_contexts"][
-                state_value["conversation_id"]] = {
-                    "history": history,
-                    "settings": current_settings, # Use current or default settings
-                    "enable_thinking": thinking_btn_state_value["enable_thinking"] # Keep if needed
-                }
-            state_value["conversations"].append({
-                "label": input_value[:50] + ('...' if len(input_value) > 50 else ''), # Truncate label
-                "key": random_id
-            })
-        else:
-             # Update settings for existing conversation before adding message
-             state_value["conversation_contexts"][
-                state_value["conversation_id"]]["settings"] = settings_form_value
-             state_value["conversation_contexts"][
-                state_value["conversation_id"]]["enable_thinking"] = thinking_btn_state_value["enable_thinking"]
-        history = state_value["conversation_contexts"][
-            state_value["conversation_id"]]["history"]
-        # Add user message
-        history.append({
-            "role": "user",
-            "content": input_value,
-            "key": str(uuid.uuid4())
-        })
-        # Update state *before* calling preprocess/submit
-        # No, preprocess needs the user message *already* in history
-        # state_value["conversation_contexts"][
-        #     state_value["conversation_id"]]["history"] = history
-        yield Gradio_Events.preprocess_submit(clear_input=True)(state_value)
-        # Make sure the model is loaded before trying to submit
-        selected_model = state_value["conversation_contexts"][state_value["conversation_id"]]["settings"].get('model')
-        if not selected_model or selected_model not in loaded_models:
-            # Handle case where no model is selected or available *before* calling submit
-            error_msg = f"Error: Model '{selected_model}' not available or not selected. Cannot generate response."
-            print(error_msg)
-            history.append({
-                 "role": "assistant",
-                 "content": [{"type": "text", "content": f'<span style="color: var(--color-red-500)">{error_msg}</span>'}],
-                 "key": str(uuid.uuid4()),
-                 "header": "Error",
-                 "loading": False,
-                 "status": "error"
-             })
-            # Need to yield the error message *and* the postprocess state
-            post_process_update = Gradio_Events.postprocess_submit(state_value)
-            post_process_update[chatbot] = gr.update(value=history) # Add chatbot update
-            yield post_process_update
-        else:
-            # Proceed with generation if model is available
-            try:
-                # Use a generator pattern even though submit itself doesn't stream *chunks* anymore
-                # It still yields intermediate states (loading) and the final state
-                for update in Gradio_Events.submit(state_value):
-                    yield update
-            except Exception as e:
-                 # This exception might be caught inside submit already,
-                 # but catch here just in case submit itself raises before yielding
-                 print(f"Error during submission process: {e}")
-                 # Manually create an error state if submit failed early
-                 history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
-                 if not history or history[-1].get("role") != "assistant":
-                    # Add error message if submit failed before adding assistant placeholder
-                    history.append({
-                         "role": "assistant",
-                         "content": [{"type": "text", "content": f'<span style="color: var(--color-red-500)">Error: {e}</span>'}],
-                         "key": str(uuid.uuid4()), "header": "Error", "loading": False, "status": "error"
-                     })
-                 else: # Add error to the loading message if it exists
-                     history[-1]["loading"] = False
-                     history[-1]["status"] = "error"
-                     history[-1]["content"] = [{"type": "text", "content": f'<span style="color: var(--color-red-500)">Error: {e}</span>'}]
-                 yield Gradio_Events.postprocess_submit(state_value) # Ensure UI is unlocked
-                 # raise e # Optionally re-raise
-            finally:
-                # Ensure UI is always returned to a non-loading state
-                yield Gradio_Events.postprocess_submit(state_value)
-    @staticmethod
-    def preprocess_submit(clear_input=True):
-        def preprocess_submit_handler(state_value):
-             # Check if conversation_id is valid before accessing context
-            if not state_value["conversation_id"] or state_value["conversation_id"] not in state_value["conversation_contexts"]:
-                 print("Warning: Invalid conversation ID in preprocess_submit.")
-                 # Handle gracefully, maybe skip update or return default state
-                 return gr.skip()
-            history = state_value["conversation_contexts"][
-                state_value["conversation_id"]]["history"]
-            return {
-                **({
-                    input:
-                    gr.update(value="", interactive=False) # Clear and disable input
-                } if clear_input else {input: gr.update(interactive=False)}), # Just disable
-                conversations:
-                gr.update(active_key=state_value["conversation_id"],
-                          items=list(
-                              map(
-                                  lambda item: {
-                                      **item,
-                                      # Disable *all* other conversations during generation
-                                      "disabled": True # item["key"] != state_value["conversation_id"]
-                                  }, state_value["conversations"]))),
-                add_conversation_btn:
-                gr.update(disabled=True),
-                clear_btn:
-                gr.update(disabled=True),
-                conversation_delete_menu_item:
-                gr.update(disabled=True),
-                # Ensure settings cannot be changed during generation
-                setting_btn: gr.update(disabled=True),
-                # Disable chatbot actions during generation
-                chatbot:
-                gr.update(value=history,
-                          bot_config=bot_config(
-                              disabled_actions=['edit', 'retry', 'delete']),
-                          user_config=user_config(
-                              disabled_actions=['edit', 'delete'])),
-                state:
-                gr.update(value=state_value), # Pass state through
-            }
-        return preprocess_submit_handler
-    @staticmethod
-    def postprocess_submit(state_value):
-         # Check if conversation_id is valid before accessing context
-        if not state_value["conversation_id"] or state_value["conversation_id"] not in state_value["conversation_contexts"]:
-             print("Warning: Invalid conversation ID in postprocess_submit.")
-             # Return a state that enables controls but maybe shows no chat
-             return {
-                 input: gr.update(interactive=True),
-                 conversation_delete_menu_item: gr.update(disabled=True), # No active convo
-                 clear_btn: gr.update(disabled=True), # No active convo
-                 conversations: gr.update(items=state_value.get("conversations", [])), # Show list
-                 add_conversation_btn: gr.update(disabled=False),
-                 setting_btn: gr.update(disabled=False), # Re-enable settings button
-                 chatbot: gr.update(value=None, bot_config=bot_config(), user_config=user_config()), # Clear chat
-                 state: gr.update(value=state_value),
-             }
-        history = state_value["conversation_contexts"][
-            state_value["conversation_id"]]["history"]
-        return {
-            input:
-            gr.update(interactive=True), # Re-enable input
-            conversation_delete_menu_item:
-            gr.update(disabled=False),
-            clear_btn:
-            gr.update(disabled=False),
-            conversations: # Re-enable all conversations in the list
-            gr.update(items=list(map(lambda item: {**item, "disabled": False}, state_value["conversations"]))),
-            add_conversation_btn:
-            gr.update(disabled=False),
-             setting_btn: gr.update(disabled=False), # Re-enable settings button
-            chatbot:
-            gr.update(value=history,
-                      bot_config=bot_config(),
-                      user_config=user_config()), # Re-enable chatbot actions
-            state:
-            gr.update(value=state_value), # Pass state through
-        }
-    @staticmethod
-    def cancel(state_value):
-        # Since generation is not streamed chunk-by-chunk, cancel primarily means
-        # unlocking the UI if it got stuck somehow.
-        # The actual model generation might continue in the background if started.
-        # For true cancellation, you'd need more complex process management.
-        print("Cancel requested. Unlocking UI.")
-        # Find the last message, mark it as cancelled if it was loading
-        if state_value["conversation_id"] and state_value["conversation_id"] in state_value["conversation_contexts"]:
-             history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
-             if history and history[-1].get("loading"):
-                 history[-1]["loading"] = False
-                 history[-1]["status"] = "cancelled" # Or 'error' or 'done'
-                 history[-1]["footer"] = get_text("Generation cancelled by user", "用户取消生成")
-                 # Optionally clear the content or leave it empty
-                 # history[-1]["content"] = [{"type": "text", "content": "[Cancelled]"}]
-        # Return the postprocess state to unlock UI elements
-        return Gradio_Events.postprocess_submit(state_value)
-    @staticmethod
-    def delete_message(state_value, e: gr.EventData):
-        index = e._data["payload"][0]["index"]
-        if not state_value["conversation_id"] or state_value["conversation_id"] not in state_value["conversation_contexts"]:
-            return gr.skip() # No active conversation
-        history = state_value["conversation_contexts"][
-            state_value["conversation_id"]]["history"]
-        # Make sure index is valid
-        if 0 <= index < len(history):
-             history.pop(index) # Use pop for efficiency
-             state_value["conversation_contexts"][
-                 state_value["conversation_id"]]["history"] = history
-        else:
-             print(f"Warning: Invalid index {index} for deleting message.")
-             return gr.skip()
-        # Return only the state update, chatbot will refresh based on state
-        return gr.update(value=state_value)
-    @staticmethod
-    def edit_message(state_value, chatbot_value, e: gr.EventData):
-        index = e._data["payload"][0]["index"]
-        if not state_value["conversation_id"] or state_value["conversation_id"] not in state_value["conversation_contexts"]:
-            return gr.skip() # No active conversation
-        history = state_value["conversation_contexts"][
-            state_value["conversation_id"]]["history"]
-        # Check index validity and if chatbot_value structure matches
-        if 0 <= index < len(history) and index < len(chatbot_value) and "content" in chatbot_value[index]:
-            # Update content based on the structure from the chatbot component
-            # It might be just text or a list of dicts like {"type": "text", "content": ...}
-            new_content = chatbot_value[index]["content"]
-            # Ensure history stores it in the expected format (likely just the text for user messages)
-            if history[index]["role"] == "user":
-                 history[index]["content"] = new_content # Assuming user content is stored as a simple string
-            else:
-                 # If assistant content is stored differently (e.g., list of dicts), adapt here
-                 history[index]["content"] = new_content
-            state_value["conversation_contexts"][
-                state_value["conversation_id"]]["history"] = history
-        else:
-            print(f"Warning: Invalid index {index} or mismatch in chatbot_value structure for editing.")
-            return gr.skip()
-        return gr.update(value=state_value) # Return updated state
-    @staticmethod
-    def regenerate_message(settings_form_value, thinking_btn_state_value,
-                           state_value, e: gr.EventData):
-        index = e._data["payload"][0]["index"]
-        if not state_value["conversation_id"] or state_value["conversation_id"] not in state_value["conversation_contexts"]:
-            return gr.skip()
-        history = state_value["conversation_contexts"][
-            state_value["conversation_id"]]["history"]
-        # Find the user message preceding the assistant message at 'index'
-        # Usually, the message to regenerate is assistant, so the input is at index-1
-        if index > 0 and history[index]["role"] == "assistant" and history[index-1]["role"] == "user":
-            # Trim history up to *before* the assistant message we want to regenerate
-            history = history[:index]
-        else:
-            print("Warning: Cannot regenerate. Expected user message before the selected assistant message.")
-            # Fallback: Maybe just remove the selected message and the one before it?
-            # Or just remove the selected one and try submitting the last user message again?
-            # Safest: just skip regeneration if structure isn't as expected.
-            return gr.skip()
-        # Update state with trimmed history and current settings
-        state_value["conversation_contexts"][
-            state_value["conversation_id"]] = {
-                "history": history,
-                "settings": settings_form_value,
-                "enable_thinking": thinking_btn_state_value["enable_thinking"]
-            }
-        # Preprocess UI (lock controls, show loading state potentially)
-        # Preprocess needs the user message back in history to display correctly
-        # Let's yield preprocess first, then submit
-        yield Gradio_Events.preprocess_submit(clear_input=False)(state_value) # Don't clear input field
-        # Make sure the model is loaded before trying to submit
-        selected_model = state_value["conversation_contexts"][state_value["conversation_id"]]["settings"].get('model')
-        if not selected_model or selected_model not in loaded_models:
-            # Handle case where no model is selected or available *before* calling submit
-            error_msg = f"Error: Model '{selected_model}' not available or not selected. Cannot regenerate response."
-            print(error_msg)
-            history.append({
-                 "role": "assistant",
-                 "content": [{"type": "text", "content": f'<span style="color: var(--color-red-500)">{error_msg}</span>'}],
-                 "key": str(uuid.uuid4()),
-                 "header": "Error",
-                 "loading": False,
-                 "status": "error"
-             })
-            post_process_update = Gradio_Events.postprocess_submit(state_value)
-            post_process_update[chatbot] = gr.update(value=history) # Add chatbot update
-            yield post_process_update
-        else:
-            # Call submit to generate the new response
-            try:
-                for chunk in Gradio_Events.submit(state_value):
-                    yield chunk
-            except Exception as e:
-                print(f"Error during regeneration submission: {e}")
-                # Handle error display similar to add_message
-                history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
-                if not history or history[-1].get("role") != "assistant":
-                     history.append({
-                         "role": "assistant",
-                         "content": [{"type": "text", "content": f'<span style="color: var(--color-red-500)">Error: {e}</span>'}],
-                         "key": str(uuid.uuid4()), "header": "Error", "loading": False, "status": "error"
-                     })
-                else:
-                     history[-1]["loading"] = False; history[-1]["status"] = "error"
-                     history[-1]["content"] = [{"type": "text", "content": f'<span style="color: var(--color-red-500)">Error: {e}</span>'}]
-                yield Gradio_Events.postprocess_submit(state_value)
-                # raise e
-            finally:
-                # Postprocess UI (unlock controls)
-                yield Gradio_Events.postprocess_submit(state_value)
-    @staticmethod
-    def select_suggestion(input_value, e: gr.EventData):
-        # This assumes the suggestion replaces the '/' trigger
-        # Adjust if the behavior should be different (e.g., append)
-        # The original JS logic suggests '/' triggers the suggestion list
-        # Selecting might append or replace based on context, let's assume replacement for simplicity
-        selected_suggestion = e._data["payload"][0]
-        # Simple replacement logic:
-        # Find the last '/' and replace everything after it, or append if no '/'
-        last_slash = input_value.rfind('/')
-        if last_slash != -1:
-            new_value = input_value[:last_slash] + selected_suggestion
-        else:
-            new_value = input_value + selected_suggestion # Or just selected_suggestion?
-        # Original logic was: input_value = input_value[:-1] + e._data["payload"][0]
-        # This assumes the trigger was the *last* character. Let's stick to that.
-        if input_value.endswith('/'):
-            new_value = input_value[:-1] + selected_suggestion
-        else:
-            new_value = selected_suggestion # Or append? Let's try replacing if no trailing /
-        return gr.update(value=new_value)
-    @staticmethod
-    def apply_prompt(e: gr.EventData):
-        # Gets value from welcome message prompt selection
-        return gr.update(value=e._data["payload"][0]["value"]["description"])
-    @staticmethod
-    def new_chat(thinking_btn_state, state_value):
-        if not state_value.get("conversation_id"): # Check if key exists
-             # If already on a new chat (no ID), do nothing
-             return gr.skip()
-        # Reset conversation ID and potentially thinking state
-        state_value["conversation_id"] = ""
-        thinking_btn_state["enable_thinking"] = True # Reset thinking state if used
-        # Prepare default settings for the new chat
-        new_chat_settings = DEFAULT_SETTINGS.copy()
-        if AVAILABLE_MODEL_OPTIONS and not new_chat_settings.get('model'):
-            new_chat_settings['model'] = AVAILABLE_MODEL_OPTIONS[0]['value']
-        # Update UI: clear chatbot, select no active conversation, reset settings form
-        return gr.update(active_key=None), \
-               gr.update(value=None), \
-               gr.update(value=new_chat_settings), \
-               gr.update(value=thinking_btn_state), \
-               gr.update(value=state_value)
-    @staticmethod
-    def select_conversation(thinking_btn_state_value, state_value,
-                            e: gr.EventData):
-        active_key = e._data["payload"][0]
-        current_id = state_value.get("conversation_id")
-        if current_id == active_key or not active_key or (
-                active_key not in state_value.get("conversation_contexts", {})):
-            print(f"Skipping conversation selection: current={current_id}, target={active_key}")
-            return gr.skip() # No change or invalid key
-        print(f"Switching conversation from '{current_id}' to '{active_key}'")
-        state_value["conversation_id"] = active_key
-        context = state_value["conversation_contexts"][active_key]
-        # Restore thinking state and settings from the selected conversation
-        thinking_btn_state_value["enable_thinking"] = context.get("enable_thinking", True) # Default to True if missing
-        restored_settings = context.get("settings", DEFAULT_SETTINGS.copy())
-        # Ensure the model in settings is still valid/loaded
-        if restored_settings.get('model') not in loaded_models:
-             print(f"Warning: Model '{restored_settings.get('model')}' in selected conversation is no longer loaded. Resetting to default.")
-             restored_settings['model'] = DEFAULT_SETTINGS.get('model') # Use current default
-        # Update UI components
-        return gr.update(active_key=active_key), \
-               gr.update(value=context.get("history", [])), \
-               gr.update(value=restored_settings), \
-               gr.update(value=thinking_btn_state_value), \
-               gr.update(value=state_value) # Update the main state
-    @staticmethod
-    def click_conversation_menu(state_value, e: gr.EventData):
-        payload = e._data["payload"]
-        if not payload or len(payload) < 2:
-             print("Warning: Invalid payload for conversation menu click.")
-             return gr.skip()
-        conversation_id = payload[0].get("key")
-        operation = payload[1].get("key")
-        if not conversation_id or not operation:
-            print("Warning: Missing key or operation in conversation menu click.")
-            return gr.skip()
-        if operation == "delete":
-            print(f"Deleting conversation: {conversation_id}")
-            if conversation_id in state_value.get("conversation_contexts", {}):
-                del state_value["conversation_contexts"][conversation_id]
-            state_value["conversations"] = [
-                item for item in state_value.get("conversations", [])
-                if item.get("key") != conversation_id
-            ]
-            # If the deleted conversation was the active one, clear the chat view
-            if state_value.get("conversation_id") == conversation_id:
-                state_value["conversation_id"] = ""
-                # Prepare default settings for the now empty view
-                new_chat_settings = DEFAULT_SETTINGS.copy()
-                if AVAILABLE_MODEL_OPTIONS and not new_chat_settings.get('model'):
-                     new_chat_settings['model'] = AVAILABLE_MODEL_OPTIONS[0]['value']
-                return gr.update(
-                    items=state_value["conversations"],
-                    active_key=None # No active key
-                ), gr.update(value=None), gr.update(value=new_chat_settings), gr.update(value=state_value) # Added settings update
-            else:
-                # Just update the list of conversations, keep the current view
-                return gr.update(
-                    items=state_value["conversations"]
-                ), gr.skip(), gr.skip(), gr.update(value=state_value) # Skip chatbot/settings update
-        # Add other operations like 'rename' here if needed
-        # elif operation == "rename":
-            # ... implementation ...
-        return gr.skip() # Default skip if operation not handled
-    @staticmethod
-    def toggle_settings_header(settings_header_state_value):
-        settings_header_state_value[
-            "open"] = not settings_header_state_value.get("open", False) # Default to False if key missing
-        return gr.update(value=settings_header_state_value)
-    @staticmethod
-    def clear_conversation_history(state_value):
-        conversation_id = state_value.get("conversation_id")
-        if not conversation_id or conversation_id not in state_value.get("conversation_contexts", {}):
-            print("Skipping clear history: No active or valid conversation.")
-            return gr.skip() # No active conversation
-        print(f"Clearing history for conversation: {conversation_id}")
-        state_value["conversation_contexts"][conversation_id]["history"] = []
-        # Update chatbot display and the state
-        return gr.update(value=None), gr.update(value=state_value)
-    @staticmethod
-    def update_browser_state(state_value):
-        # Only save the necessary parts to browser state
-        return gr.update(value=dict(
-            conversations=state_value.get("conversations", []),
-            conversation_contexts=state_value.get("conversation_contexts", {})
-            # Do not save the active conversation_id itself, it's transient UI state
-        ))
-    @staticmethod
-    def apply_browser_state(browser_state_value, state_value):
-        if not browser_state_value: # Handle initial load where state might be null/empty
-             print("No browser state found to apply.")
-             # Initialize state if empty
-             if not state_value.get("conversations"):
-                 state_value["conversations"] = []
-             if not state_value.get("conversation_contexts"):
-                 state_value["conversation_contexts"] = {}
-             state_value["conversation_id"] = "" # Ensure no active conversation on fresh load
-             # Prepare default settings for the initial view
-             initial_settings = DEFAULT_SETTINGS.copy()
-             if AVAILABLE_MODEL_OPTIONS and not initial_settings.get('model'):
-                 initial_settings['model'] = AVAILABLE_MODEL_OPTIONS[0]['value']
-             return gr.update(items=[]), gr.update(value=None), gr.update(value=initial_settings), gr.update(value=state_value)
-        print("Applying browser state...")
-        # Basic validation: check if keys exist and have expected types (list/dict)
-        loaded_conversations = browser_state_value.get("conversations")
-        loaded_contexts = browser_state_value.get("conversation_contexts")
-        if isinstance(loaded_conversations, list) and isinstance(loaded_contexts, dict):
-             state_value["conversations"] = loaded_conversations
-             state_value["conversation_contexts"] = loaded_contexts
-             state_value["conversation_id"] = "" # Reset active conversation on load
-             # Prepare default settings for the initial view after loading state
-             initial_settings = DEFAULT_SETTINGS.copy()
-             if AVAILABLE_MODEL_OPTIONS and not initial_settings.get('model'):
-                 initial_settings['model'] = AVAILABLE_MODEL_OPTIONS[0]['value']
-             # Update UI based on loaded state
-             return gr.update(items=loaded_conversations, active_key=None), \
-                    gr.update(value=None), \
-                    gr.update(value=initial_settings), \
-                    gr.update(value=state_value)
-        else:
-            print("Warning: Invalid browser state format. Ignoring.")
-            # Initialize state as if no browser state was found
-            state_value["conversations"] = []
-            state_value["conversation_contexts"] = {}
-            state_value["conversation_id"] = ""
-            initial_settings = DEFAULT_SETTINGS.copy()
-            if AVAILABLE_MODEL_OPTIONS and not initial_settings.get('model'):
-                 initial_settings['model'] = AVAILABLE_MODEL_OPTIONS[0]['value']
-            return gr.update(items=[]), gr.update(value=None), gr.update(value=initial_settings), gr.update(value=state_value)
-# --- UI Definition ---
-css = """
-/* ... (keep existing CSS) ... */
-.gradio-container {
-  padding: 0 !important;
-}
-.gradio-container > main.fillable {
-  padding: 0 !important;
-}
-#chatbot {
-  height: calc(100vh - 21px - 16px); /* Adjust if header/footer height changes */
-  max-height: 1500px;
-}
-#chatbot .chatbot-conversations {
-  height: 100vh; /* Full height */
-  background-color: var(--ms-gr-ant-color-bg-layout);
-  padding-left: 4px;
-  padding-right: 4px;
-  display: flex; /* Use flexbox for vertical layout */
-  flex-direction: column; /* Stack children vertically */
-}
-#chatbot .chatbot-conversations .chatbot-conversations-list {
-  padding-left: 0;
-  padding-right: 0;
-  flex-grow: 1; /* Allow list to take remaining space */
-  overflow-y: auto; /* Add scroll if list is long */
-}
-#chatbot .chatbot-chat {
-  padding: 32px;
-  padding-bottom: 0;
-  height: 100%;
-  display: flex; /* Use flexbox */
-  flex-direction: column; /* Stack chat messages and input vertically */
-}
-@media (max-width: 768px) {
-  #chatbot .chatbot-chat {
-      padding: 16px; /* Add some padding on mobile */
-      padding-bottom: 0;
-  }
-  #chatbot .chatbot-conversations {
-      /* Consider hiding conversation list or making it a drawer on mobile */
-  }
-}
-#chatbot .chatbot-chat .chatbot-chat-messages {
-  flex: 1; /* Allow chat messages to take available space */
-  overflow-y: auto; /* Add scroll to messages */
-}
-#chatbot .setting-form-thinking-budget {
-    /* Keep or remove based on whether thinking budget is still relevant */
-    /* display: none; /* Example: Hide if not used */
-}
-/* Style for disabled input */
-#input-sender textarea:disabled {
-    background-color: var(--ms-gr-ant-color-bg-container-disabled);
-    cursor: not-allowed;
-}
-"""
-# Removed model_options_map_json and the JS function, as options are handled in Python now
-with gr.Blocks(css=css, fill_width=True) as demo: # Removed js=js
-    # Initial state structure
-    state = gr.State({
-        "conversation_contexts": {},
-        "conversations": [],
-        "conversation_id": "",
-    })
-    with ms.Application(), antdx.XProvider(
-            theme=DEFAULT_THEME, locale=DEFAULT_LOCALE), ms.AutoLoading():
-        with antd.Row(gutter=[0, 0], wrap=False, elem_id="chatbot"): # Use gutter 0 for closer columns
-            # Left Column
-            with antd.Col(md=dict(flex="0 0 260px", span=0), # Hide on smaller screens (md breakpoint)
-                          xs=dict(span=0), # Explicitly hide on extra small
-                          sm=dict(span=24, order=1, flex="0 0 260px"), # Show on small screens, potentially adjust layout/order
-                          # Consider using a collapsible drawer for mobile instead
-                          elem_classes="chatbot-conversations-col" # Add class for potential styling
-                          ):
-                with ms.Div(elem_classes="chatbot-conversations"): # This div now uses flex column from CSS
-                    with antd.Flex(vertical=True,
-                                   gap="small",
-                                   # Removed elem_style=dict(height="100%") - parent div controls height
-                                   ):
-                        # Logo
-                        Logo()
-                        # New Conversation Button
-                        with antd.Button(value=None,
-                                         color="primary",
-                                         variant="filled",
-                                         block=True) as add_conversation_btn:
-                            ms.Text(get_text("New Conversation", "新建对话"))
-                            with ms.Slot("icon"):
-                                antd.Icon("PlusOutlined")
-                        # Conversations List
-                        with antdx.Conversations(
-                                elem_classes="chatbot-conversations-list", # Takes remaining space
-                                active_key="", # Start with no active key
-                                items=[] # Initial items empty, loaded by state
-                        ) as conversations:
-                             # Keep menu items definition
-                            with ms.Slot('menu.items'):
-                                with antd.Menu.Item(
-                                        label="Delete", key="delete",
-                                        danger=True
-                                ) as conversation_delete_menu_item:
-                                    with ms.Slot("icon"):
-                                        antd.Icon("DeleteOutlined")
-            # Right Column
-            with antd.Col(flex=1, # Takes remaining horizontal space
-                         elem_style=dict(height="100%"), # Ensure it fills vertically
-                         md=dict(span=24, order=0), # Adjust order for mobile if left col shown
-                         xs=dict(span=24, order=0),
-                         sm=dict(order=0)
-                         ):
-                with antd.Flex(vertical=True,
-                               gap="small", # Gap between chatbot and sender
-                               elem_classes="chatbot-chat"): # This flex controls vertical layout of chat+input
-                    # Chatbot Display Area
-                    chatbot = pro.Chatbot(elem_classes="chatbot-chat-messages", # Takes flexible space
-                                          # height=0, # Let flexbox control height
-                                          value = None, # Initial value empty, loaded by state
-                                          welcome_config=welcome_config(),
-                                          user_config=user_config(),
-                                          bot_config=bot_config())
-                    # Input Area (Sender)
-                    with antdx.Suggestion(
-                            items=DEFAULT_SUGGESTIONS,
-                            should_trigger="""(e, { onTrigger, onKeyDown }) => {
-                      // Keep existing JS logic for suggestions
-                      switch(e.key) {
-                        case '/': onTrigger(); break;
-                        case 'ArrowRight': case 'ArrowLeft': case 'ArrowUp': case 'ArrowDown': break;
-                        default: onTrigger(false);
-                      }
-                      onKeyDown(e);
-                    }""") as suggestion:
-                        with ms.Slot("children"):
-                             # Use elem_id for easier targeting if needed
-                            with antdx.Sender(elem_id="input-sender",
-                                              placeholder=get_text(
-                                    "Enter \"/\" to get suggestions, Shift+Enter for newline",
-                                    "输入 \"/\" 获取提示，Shift+Enter 换行"),
-                                              # interactive=True # Default is True
-                                              ) as input:
-                                with ms.Slot("header"):
-                                    # Pass AVAILABLE_MODEL_OPTIONS to SettingsHeader
-                                    settings_header_state, settings_form = SettingsHeader(
-                                        model_options=AVAILABLE_MODEL_OPTIONS, # Pass available options
-                                        default_settings=DEFAULT_SETTINGS # Pass defaults
-                                        )
-                                with ms.Slot("prefix"):
-                                    with antd.Flex(
-                                            gap=4,
-                                            wrap=True, # Allow wrapping on small screens
-                                            elem_style=dict(maxWidth='80vw') # Adjust max width
-                                            ):
-                                        with antd.Button(
-                                                value=None, type="text"
-                                                ) as setting_btn:
-                                            with ms.Slot("icon"): antd.Icon("SettingOutlined")
-                                        with antd.Button(
-                                                value=None, type="text"
-                                                ) as clear_btn:
-                                            with ms.Slot("icon"): antd.Icon("ClearOutlined")
-                                        # Keep ThinkingButton if UI uses it, otherwise remove
-                                        thinking_btn_state = ThinkingButton()
-    # --- Event Handlers ---
-    # Browser State Handler (if enabled)
-    if save_history:
-        browser_state = gr.BrowserState(
-            # Define the structure expected from the browser
-            value={ "conversations": [], "conversation_contexts": {} },
-            storage_key="vi_legal_chat_demo_storage" # Use a unique key
         )
-        # When Python state changes, update the browser state
-        state.change(fn=Gradio_Events.update_browser_state,
-                     inputs=[state],
-                     outputs=[browser_state],
-                     queue=False) # Run immediately
-        # On page load, apply browser state to Python state and UI
-        # Note: Ensure outputs match what apply_browser_state returns
-        demo.load(fn=Gradio_Events.apply_browser_state,
-                  inputs=[browser_state, state],
-                  outputs=[conversations, chatbot, settings_form, state], # Outputs to update UI
-                  queue=False) # Run immediately on load
-    elif not loaded_models:
-         # If history saving is off AND no models loaded, show a message
-         def show_no_model_warning():
-              gr.Warning("No models were loaded successfully. The application functionality will be limited.")
-              # You could also update a specific Gradio component to show the error
-         demo.load(fn=show_no_model_warning, inputs=[], outputs=[])
-    # Conversations Handler
-    add_conversation_btn.click(fn=Gradio_Events.new_chat,
-                               inputs=[thinking_btn_state, state],
-                               outputs=[ # Match return order of new_chat
-                                   conversations, chatbot, settings_form,
-                                   thinking_btn_state, state
-                               ])
-    conversations.active_change(fn=Gradio_Events.select_conversation,
-                                inputs=[thinking_btn_state, state],
-                                outputs=[ # Match return order of select_conversation
-                                    conversations, chatbot, settings_form,
-                                    thinking_btn_state, state
-                                ])
-    conversations.menu_click(fn=Gradio_Events.click_conversation_menu,
-                             inputs=[state],
-                             outputs=[ # Match return order of click_conversation_menu
-                                 conversations, chatbot, settings_form, state
-                                 ],
-                            ) # queue=False ? Might be okay
-    # Chatbot Handler
-    chatbot.welcome_prompt_select(fn=Gradio_Events.apply_prompt,
-                                  outputs=[input]) # Update input field
-    # Use _js counterpart for direct manipulation if needed, otherwise rely on state change
-    chatbot.delete(fn=Gradio_Events.delete_message,
-                   inputs=[state],
-                   outputs=[state]) # Only update state, UI will react
-    chatbot.edit(fn=Gradio_Events.edit_message,
-                 inputs=[state, chatbot], # Pass chatbot value for content
-                 outputs=[state]) # Only update state
-    # Regenerate uses the standard submit flow after trimming history
-    regenerating_event = chatbot.retry(
-        fn=Gradio_Events.regenerate_message,
-        inputs=[settings_form, thinking_btn_state, state],
-        outputs=[ # Outputs from preprocess, submit, and postprocess combined
-             input, conversations, add_conversation_btn, clear_btn,
-             conversation_delete_menu_item, setting_btn, chatbot, state
         ],
-        # Ensure outputs match the combined yields of the handler chain
-       )
-    # Input Handler
-    submit_event = input.submit(
-        fn=Gradio_Events.add_message,
-        inputs=[input,
- settings_form, thinking_btn_state, state],
-        outputs=[ # Outputs from preprocess, submit, and postprocess combined
-            input, conversations, add_conversation_btn, clear_btn,
-            conversation_delete_menu_item, setting_btn, chatbot, state
-        ]) # Ensure outputs match yields
-    # Cancel needs to unlock UI elements modified by preprocess
-    input.cancel(fn=Gradio_Events.cancel,
-                 inputs=[state],
-                  outputs=[ # Outputs matching postprocess_submit return dict keys
-                      input, conversation_delete_menu_item, clear_btn,
-                      conversations, add_conversation_btn, setting_btn, chatbot, state
-                  ],
-                 cancels=[submit_event, regenerating_event], # Cancel ongoing submit/regen
-                 queue=False) # Run immediately
-    # Input Actions Handler
-    setting_btn.click(fn=Gradio_Events.toggle_settings_header,
-                      inputs=[settings_header_state],
-                      outputs=[settings_header_state])
-    clear_btn.click(fn=Gradio_Events.clear_conversation_history,
-                    inputs=[state],
-                    outputs=[chatbot, state]) # Update chatbot display and state
-    suggestion.select(fn=Gradio_Events.select_suggestion,
-                      inputs=[input],
-                      outputs=[input]) # Update input field
-# --- Launch ---
 if __name__ == "__main__":
-     if not loaded_models:
-         print("\nWARNING: No models loaded. Gradio app will launch but may not be functional.\n")
-         # Optionally prevent launch entirely:
-         # print("Exiting because no models were loaded.")
-         # exit()
-     print("Launching Gradio Interface...")
-     demo.queue(default_concurrency_limit=10, # Adjust concurrency based on your GPU/CPU resources
-               max_size=20).launch(ssr_mode=False, # Consider True if SEO or initial load speed is critical
-                                     # share=True, # Uncomment to create a public link (use with caution)
-                                     # server_name="0.0.0.0" # Uncomment to allow access from network
-                                     max_threads=40 # Gradio default
-                                  )

 import torch
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+import gradio as gr
+# Kiểm tra thiết bị (GPU nếu có)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}") # Thêm log để biết thiết bị đang sử dụng
+# --- Tải mô hình và tokenizer ---
+# Khởi tạo biến model và tokenizer là None
+model_1, tokenizer_1 = None, None
+model_2, tokenizer_2 = None, None
+# model_3, tokenizer_3 = None, None # Không cần tải model_3 nữa
+model_4, tokenizer_4 = None, None
 # Sử dụng try-except để xử lý lỗi nếu không tải được mô hình
 try:
     print(f"Loading model: {model_name_1}...")
     model_1 = AutoModelForSeq2SeqLM.from_pretrained(model_name_1).to(device)
     tokenizer_1 = AutoTokenizer.from_pretrained(model_name_1)
     print(f"Model {model_name_1} loaded successfully.")
 except Exception as e:
     print(f"Error loading model {model_name_1}: {e}")
     print(f"Loading model: {model_name_2}...")
     model_2 = AutoModelForSeq2SeqLM.from_pretrained(model_name_2).to(device)
     tokenizer_2 = AutoTokenizer.from_pretrained(model_name_2)
     print(f"Model {model_name_2} loaded successfully.")
 except Exception as e:
     print(f"Error loading model {model_name_2}: {e}")
 # Bỏ qua việc tải model_3 (ViLawT5_RL)
+# ... (phần code tải model_3 bị comment như cũ) ...
 try:
     model_name_4 = "sunbv56/V-LegalQA"
     print(f"Loading model: {model_name_4}...")
     model_4 = AutoModelForSeq2SeqLM.from_pretrained(model_name_4).to(device)
     tokenizer_4 = AutoTokenizer.from_pretrained(model_name_4)
     print(f"Model {model_name_4} loaded successfully.")
 except Exception as e:
     print(f"Error loading model {model_name_4}: {e}")
+# --- Hàm sinh phản hồi ---
+def chatbot_response(question, model_choice, max_new_tokens, temperature, top_k, top_p, repetition_penalty, use_early_stopping, use_do_sample):
+    model = None
+    tokenizer = None
+    # Chọn model dựa trên lựa chọn của người dùng (đã bỏ ViLawT5_RL)
+    if model_choice == "ViLawT5" and model_1 and tokenizer_1:
+        model = model_1
+        tokenizer = tokenizer_1
+    elif model_choice == "ViT5" and model_2 and tokenizer_2:
+        model = model_2
+        tokenizer = tokenizer_2
+    # Bỏ điều kiện kiểm tra ViLawT5_RL
+    # elif model_choice == "ViLawT5_RL" and model_3 and tokenizer_3:
+    #     model = model_3
+    #     tokenizer = tokenizer_3
+    elif model_choice == "V-LegalQA" and model_4 and tokenizer_4:
+        model = model_4
+        tokenizer = tokenizer_4
+    else:
+        # Kiểm tra xem model có được tải không
+        available_models = []
+        if model_1: available_models.append("ViLawT5")
+        if model_2: available_models.append("ViT5")
+        # Không thêm ViLawT5_RL vào danh sách kiểm tra
+        if model_4: available_models.append("V-LegalQA")
+        if not available_models:
+             return "Error: No models were loaded successfully. Please check the logs."
+        if model_choice not in available_models:
+             return f"Error: Model '{model_choice}' was not loaded successfully or is invalid. Available models: {', '.join(available_models)}"
+        else: # Trường hợp model_choice hợp lệ nhưng model/tokenizer là None (lỗi không mong muốn)
+             return f"Error: An unexpected issue occurred with model '{model_choice}'. Please check the logs."
+    print(f"Generating response using {model_choice} with params: max_new_tokens={max_new_tokens}, temp={temperature}, top_k={top_k}, top_p={top_p}, rep_penalty={repetition_penalty}, early_stop={use_early_stopping}, do_sample={use_do_sample}")
+    input_text = f"câu_hỏi: {question}"
+    try:
+        data = tokenizer(
+            input_text,
+            return_tensors="pt",
+            truncation=True,
+            return_attention_mask=True,
+            add_special_tokens=True,
+            padding="max_length",
+            max_length=256 # Cân nhắc tăng max_length nếu câu hỏi/context dài
         )
+        input_ids = data.input_ids.to(device)
+        attention_mask = data.attention_mask.to(device)
+        # Suy luận với mô hình
+        with torch.no_grad():
+            outputs = model.generate(
+                input_ids,
+                attention_mask=attention_mask,
+                max_new_tokens=int(max_new_tokens),
+                early_stopping=use_early_stopping,
+                do_sample=use_do_sample,
+                temperature=float(temperature),
+                top_k=int(top_k),
+                top_p=float(top_p),
+                repetition_penalty=float(repetition_penalty),
+                # Thêm pad_token_id nếu cần (thường không cần cho T5)
+                # pad_token_id=tokenizer.pad_token_id
+            )
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        print(f"Raw output shape: {outputs[0].shape}") # Log thêm shape
+        print(f"Decoded response: {response}")
+        return response
+    except Exception as e:
+        print(f"Error during generation: {e}")
+        # In thêm traceback để debug
+        import traceback
+        traceback.print_exc()
+        return f"An error occurred during response generation: {e}"
+# --- Tạo danh sách các model đã tải thành công (bỏ ViLawT5_RL) ---
+loaded_models = []
+if model_1 and tokenizer_1: loaded_models.append("ViLawT5")
+if model_2 and tokenizer_2: loaded_models.append("ViT5")
+if model_4 and tokenizer_4: loaded_models.append("V-LegalQA")
+# Chọn model mặc định
+default_model = "V-LegalQA" if "V-LegalQA" in loaded_models else (loaded_models[0] if loaded_models else "No models available")
+# --- Tạo giao diện với Gradio ---
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # 🤖 AI Chatbot Pháp luật Việt Nam (Demo)
+        Chọn mô hình và đặt câu hỏi liên quan đến pháp luật.
+        Nhấn **Shift + Enter** để gửi câu hỏi, **Enter** để xuống dòng.
+        """
+    )
+    with gr.Row():
+        model_choice = gr.Dropdown(
+            choices=loaded_models,
+            label="Chọn Mô hình AI",
+            value=default_model,
+            interactive=bool(loaded_models) # Chỉ cho phép tương tác nếu có model
+        )
+    # Đảm bảo 'lines' >= 2 để Shift+Enter có tác dụng rõ ràng
+    question_input = gr.Textbox(
+        label="Nhập câu hỏi của bạn (Shift+Enter để gửi)",
+        placeholder="Ví dụ: Thế nào là tội cố ý gây thương tích?",
+        lines=3, # Giữ nguyên hoặc tăng nếu muốn ô nhập cao hơn
+        # scale=7 # Ví dụ: làm cho ô nhập rộng hơn nếu cần
+    )
+    # --- Cập nhật giá trị mặc định trong Accordion ---
+    with gr.Accordion("Tùy chọn Nâng cao (Generation Parameters)", open=False):
+        with gr.Row():
+            early_stopping_checkbox = gr.Checkbox(label="Enable Early Stopping", value=False, info="Dừng sớm khi gặp token EOS.")
+            do_sample_checkbox = gr.Checkbox(label="Enable Sampling (do_sample)", value=False, info="Sử dụng sampling (cần thiết cho temperature, top_k, top_p). Tắt nếu muốn greedy search.")
+        with gr.Row():
+            max_new_tokens_slider = gr.Slider(minimum=10, maximum=1024, value=512, step=10, label="Max New Tokens", info="Số lượng token tối đa được sinh ra.")
+            temperature_slider = gr.Slider(minimum=0.1, maximum=2.0, value=1.0, step=0.1, label="Temperature", info="Độ 'sáng tạo' của câu trả lời (thấp hơn = bảo thủ hơn). Cần bật do_sample.")
+        with gr.Row():
+            top_k_slider = gr.Slider(minimum=1, maximum=200, value=50, step=1, label="Top-K", info="Chỉ xem xét K token có xác suất cao nhất. Cần bật do_sample.")
+            top_p_slider = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, step=0.01, label="Top-P (Nucleus Sampling)", info="Chỉ xem xét các token có tổng xác suất >= P. Cần bật do_sample.")
+        repetition_penalty_slider = gr.Slider(minimum=1.0, maximum=3.0, value=1.0, step=0.1, label="Repetition Penalty", info="Phạt các token đã xuất hiện (cao hơn = ít lặp lại hơn).")
+    response_output = gr.Textbox(label="Phản hồi của Chatbot", lines=5, interactive=False)
+    # Nút gửi vẫn giữ lại phòng trường hợp người dùng thích click hơn
+    submit_btn = gr.Button("Gửi câu hỏi", variant="primary")
+    # --- THAY ĐỔI QUAN TRỌNG ---
+    # Tạo một list các inputs để dùng chung cho cả nút bấm và nhấn Enter
+    chatbot_inputs = [
+        question_input,
+        model_choice,
+        max_new_tokens_slider,
+        temperature_slider,
+        top_k_slider,
+        top_p_slider,
+        repetition_penalty_slider,
+        early_stopping_checkbox,
+        do_sample_checkbox
+    ]
+    # 1. Gửi khi nhấn nút
+    submit_btn.click(
+        fn=chatbot_response,
+        inputs=chatbot_inputs,
+        outputs=response_output
+    )
+    # 2. Gửi khi nhấn Enter trong Textbox question_input
+    #    Shift+Enter sẽ tự động xuống dòng (hành vi mặc định khi lines > 1)
+    question_input.submit(
+        fn=chatbot_response,
+        inputs=chatbot_inputs,
+        outputs=response_output
+    )
+    # -----------------------------
+    gr.Examples(
+        examples=[
+            ["Hợp đồng vô hiệu khi nào?", "V-LegalQA"],
+            ["Quyền và nghĩa vụ của người lao động là gì?", "ViT5"],
+            ["Người dưới 18 tuổi có được ký hợp đồng lao động không?\nThời gian làm việc tối đa là bao lâu?", "V-LegalQA"] # Ví dụ multi-line
         ],
+        inputs=[question_input, model_choice]
+    )
+# --- Chạy Gradio ---
 if __name__ == "__main__":
+    if not loaded_models:
+        print("WARNING: No models were loaded successfully. The application might not function correctly.")
+        # Cân nhắc thêm: gr.Info("Không có mô hình nào được tải thành công!") trong Blocks
+    # Bật share=True nếu muốn tạo link chia sẻ tạm thời
+    demo.launch(debug=True, share=False)