Spaces:

kikikara
/

TUFA-Explainable_AI

Sleeping

App Files Files Community

kikikara commited on Jun 4, 2025

Commit

c9044fd

verified ·

1 Parent(s): d607281

Update app.py

Browse files

Files changed (1) hide show

app.py +145 -114

app.py CHANGED Viewed

@@ -6,41 +6,47 @@ import numpy as np
 import html
 from transformers import AutoTokenizer, AutoModel, logging as hf_logging
 import pandas as pd
-import matplotlib
-matplotlib.use('Agg') # Matplotlib 백엔드 설정 (Gradio와 함께 사용 시 중요)
 import matplotlib.pyplot as plt
 from sklearn.decomposition import PCA
-# --- 기존 설정 및 전역 모델 로드 부분 ---
-# Hugging Face Transformers 로깅 레벨 설정
 hf_logging.set_verbosity_error()
-# 설정
 MODEL_NAME = "bert-base-uncased"
 DEVICE     = "cpu"
-SAVE_DIR   = "저장저장1"
 LAYER_ID   = 4
 SEED       = 0
 CLF_NAME   = "linear"
-# 전역 모델 로드
 TOKENIZER_GLOBAL, MODEL_GLOBAL = None, None
 W_GLOBAL, MU_GLOBAL, W_P_GLOBAL, B_P_GLOBAL = None, None, None, None
-CLASS_NAMES_GLOBAL = None
 MODELS_LOADED_SUCCESSFULLY = False
 MODEL_LOADING_ERROR_MESSAGE = ""
 try:
-    print("Gradio App: 모델 로딩을 시작합니다...")
     lda_file_path = os.path.join(SAVE_DIR, f"lda_layer{LAYER_ID}_seed{SEED}.pkl")
     clf_file_path = os.path.join(SAVE_DIR, f"{CLF_NAME}_layer{LAYER_ID}_projlda_seed{SEED}.pkl")
     if not os.path.isdir(SAVE_DIR):
-        raise FileNotFoundError(f"오류: 모델 저장 디렉토리 '{SAVE_DIR}'를 찾을 수 없습니다.")
     if not os.path.exists(lda_file_path):
-        raise FileNotFoundError(f"오류: LDA 모델 파일 '{lda_file_path}'를 찾을 수 없습니다.")
     if not os.path.exists(clf_file_path):
-        raise FileNotFoundError(f"오류: 분류기 모델 파일 '{clf_file_path}'를 찾을 수 없습니다.")
     lda = joblib.load(lda_file_path)
     clf = joblib.load(clf_file_path)
@@ -57,84 +63,101 @@ try:
         MODEL_NAME, output_hidden_states=True, output_attentions=False
     ).to(DEVICE).eval()
-    if hasattr(lda, 'classes_'): CLASS_NAMES_GLOBAL = lda.classes_
-    elif hasattr(clf, 'classes_'): CLASS_NAMES_GLOBAL = clf.classes_
     MODELS_LOADED_SUCCESSFULLY = True
-    print("Gradio App: 모든 모델 및 데이터 로드 성공!")
 except Exception as e:
     MODELS_LOADED_SUCCESSFULLY = False
-    MODEL_LOADING_ERROR_MESSAGE = f"모델 로딩 중 심각한 오류 발생: {str(e)}\n'저장저장1' 폴더와 내용물을 확인해주세요."
     print(MODEL_LOADING_ERROR_MESSAGE)
-# 헬퍼 함수: PCA 시각화 (3D)
-def plot_token_pca_3d(token_embeddings_3d, tokens, scores, title="Token Embeddings 3D PCA (Colored by Importance)"):
-    fig = plt.figure(figsize=(10, 8))
-    ax = fig.add_subplot(111, projection='3d')
-    num_annotations = min(len(tokens), 15)
-    if len(scores) > 0 and len(tokens) > 0: # scores와 tokens가 비어있지 않은지 확인
-        # scores가 NumPy 배열이 아닐 수 있으므로, 리스트인 경우 np.array로 변환
-        scores_np_array = np.array(scores)
-        indices_to_annotate = np.argsort(scores_np_array)[-num_annotations:]
-    else:
-        indices_to_annotate = np.array([])
-    scatter = ax.scatter(token_embeddings_3d[:, 0], token_embeddings_3d[:, 1], token_embeddings_3d[:, 2],
-                         c=scores, cmap="coolwarm_r", s=50, alpha=0.8, depthshade=True)
-    for i in range(len(tokens)):
-        if i in indices_to_annotate:
-            ax.text(token_embeddings_3d[i, 0], token_embeddings_3d[i, 1], token_embeddings_3d[i, 2],
-                    f' {tokens[i]}', size=8, zorder=1, color='k')
-    ax.set_title(title, fontsize=14)
-    ax.set_xlabel("PCA Component 1", fontsize=10)
-    ax.set_ylabel("PCA Component 2", fontsize=10)
-    ax.set_zlabel("PCA Component 3", fontsize=10)
-    cbar = plt.colorbar(scatter, label="Importance Score", shrink=0.7)
-    cbar.ax.tick_params(labelsize=8)
-    ax.tick_params(axis='both', which='major', labelsize=8)
-    plt.tight_layout()
     return fig
-# ────────── 핵심 분석 함수 (반환 값 7개) ──────────
-def analyze_sentence_for_gradio(sentence_text, top_k_value):
-    def create_empty_plot(message="N/A"):
-        fig = plt.figure(figsize=(2,2));
-        ax = fig.add_subplot(111)
-        ax.text(0.5, 0.5, message, ha='center', va='center', fontsize=10)
-        ax.axis('off')
-        return fig
     if not MODELS_LOADED_SUCCESSFULLY:
-        error_html = f"<p style='color:red;'>초기화 오류: {html.escape(MODEL_LOADING_ERROR_MESSAGE)}</p>"
         empty_df = pd.DataFrame(columns=['token', 'score'])
-        empty_fig_placeholder = create_empty_plot()
-        return error_html, [], "모델 로딩 실패", {"오류":"모델 로딩 실패"}, [], empty_df, empty_fig_placeholder
     try:
         tokenizer, model = TOKENIZER_GLOBAL, MODEL_GLOBAL
         W, mu, w_p, b_p = W_GLOBAL, MU_GLOBAL, W_P_GLOBAL, B_P_GLOBAL
-        class_names = CLASS_NAMES_GLOBAL
         enc = tokenizer(sentence_text, return_tensors="pt", truncation=True, max_length=510, padding=True)
         input_ids, attn_mask  = enc["input_ids"].to(DEVICE), enc["attention_mask"].to(DEVICE)
         if input_ids.shape[1] == 0:
             empty_df = pd.DataFrame(columns=['token', 'score'])
-            empty_fig_placeholder = create_empty_plot()
-            return "<p style='color:orange;'>입력 오류: 유효한 토큰이 없습니다.</p>", [], "입력 오류", {"오류":"입력 오류"}, [], empty_df, empty_fig_placeholder
         input_embeds_detached = model.embeddings.word_embeddings(input_ids).clone().detach()
         input_embeds_for_grad = input_embeds_detached.clone().requires_grad_(True)
         outputs = model(inputs_embeds=input_embeds_for_grad, attention_mask=attn_mask,
                         output_hidden_states=True, output_attentions=False)
         cls_vec = outputs.hidden_states[LAYER_ID][:, 0, :]
         z_projected = (cls_vec - mu) @ W
@@ -146,14 +169,14 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
         logit_output[0, pred_idx].backward()
         if input_embeds_for_grad.grad is None:
             empty_df = pd.DataFrame(columns=['token', 'score'])
-            empty_fig_placeholder = create_empty_plot()
-            return "<p style='color:red;'>분석 오류: 그래디언트 계산 실패.</p>", [],"분석 오류", {"오류":"분석 오류"}, [], empty_df, empty_fig_placeholder
         grads = input_embeds_for_grad.grad.clone().detach()
         scores = (grads * input_embeds_detached).norm(dim=2).squeeze(0)
         scores_np = scores.cpu().numpy()
-        valid_scores = scores_np[np.isfinite(scores_np)]
-        scores_np = scores_np / (valid_scores.max() + 1e-9) if len(valid_scores) > 0 and valid_scores.max() > 0 else np.zeros_like(scores_np)
         tokens_raw = tokenizer.convert_ids_to_tokens(input_ids[0], skip_special_tokens=False)
         actual_tokens = [tok for i, tok in enumerate(tokens_raw) if input_ids[0,i] != tokenizer.pad_token_id]
@@ -173,8 +196,8 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
                 html_tokens_list.append(f"<span style='font-weight:bold;'>{html.escape(clean_tok_str)}</span>")
                 highlighted_text_data.append((clean_tok_str + " ", None))
             else:
-                color = f"rgba(255, 0, 0, {current_score_clipped:.2f})"
-                html_tokens_list.append(f"<span style='background-color:{color}; padding: 1px 2px; margin: 1px; border-radius: 3px; display:inline-block;'>{html.escape(clean_tok_str)}</span>")
                 highlighted_text_data.append((clean_tok_str + " ", round(current_score_clipped, 3)))
         html_output_str = " ".join(html_tokens_list).replace(" ##", "")
@@ -191,14 +214,12 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
         barplot_df = pd.DataFrame(top_tokens_for_barplot_list) if top_tokens_for_barplot_list else pd.DataFrame(columns=['token', 'score'])
-        predicted_class_label_str = str(pred_idx)
-        if class_names is not None and 0 <= pred_idx < len(class_names):
-            predicted_class_label_str = str(class_names[pred_idx])
-        prediction_summary_text = f"클래스: {predicted_class_label_str}\n확률: {pred_prob_val:.3f}"
-        prediction_details_for_label = {"예측 클래스": predicted_class_label_str, "확률": f"{pred_prob_val:.3f}"}
-        pca_fig = create_empty_plot("PCA Plot N/A\n(Not enough non-special tokens for 3D)")
         non_special_token_indices = [idx for idx, token_id in enumerate(input_ids[0,:len(actual_tokens)].tolist())
                                      if token_id not in [cls_token_id, sep_token_id]]
@@ -206,12 +227,11 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
             pca_tokens = [actual_tokens[i] for i in non_special_token_indices]
             if len(pca_tokens) > 0:
                 pca_embeddings = actual_input_embeds[non_special_token_indices, :]
-                pca_scores = actual_scores_np[non_special_token_indices]
                 pca = PCA(n_components=3, random_state=SEED)
                 token_embeddings_3d = pca.fit_transform(pca_embeddings)
-                # plt.close(pca_fig) # 이전 빈 그림 닫기
-                pca_fig = plot_token_pca_3d(token_embeddings_3d, pca_tokens, pca_scores)
         return (html_output_str, highlighted_text_data,
                 prediction_summary_text, prediction_details_for_label,
@@ -221,59 +241,71 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
     except Exception as e:
         import traceback
         tb_str = traceback.format_exc()
-        error_html = f"<p style='color:red;'>분석 중 오류 발생: {html.escape(str(e))}</p><pre>{html.escape(tb_str)}</pre>"
-        print(f"Analyze_sentence_for_gradio error: {e}\n{tb_str}")
         empty_df = pd.DataFrame(columns=['token', 'score'])
-        empty_fig_placeholder = create_empty_plot("Error during plot generation")
-        return error_html, [], "분석 실패", {"오류": str(e)}, [], empty_df, empty_fig_placeholder
-# ────────── Gradio 인터페이스 정의 ──────────
-theme = gr.themes.Glass(primary_hue="blue", secondary_hue="cyan", neutral_hue="sky").set(
-    body_background_fill="linear-gradient(to right, #c9d6ff, #e2e2e2)",
-    block_background_fill="rgba(255,255,255,0.8)",
-    block_border_width="1px",
-    block_shadow="*shadow_drop_lg"
 )
-with gr.Blocks(title="AI 문장 분석기 XAI 🚀", theme=theme, css=".gradio-container {max-width: 98% !important;}") as demo:
-    gr.Markdown("# 🚀 AI 문장 분석기 XAI: 모델 해석 탐험")
-    gr.Markdown("BERT 모델 예측의 근거를 다양한 시각화 기법으로 탐색합니다. 토큰의 중요도와 임베딩 공간에서의 분포를 확인해보세요.")
     with gr.Row(equal_height=False):
-        with gr.Column(scale=1, min_width=300):
             with gr.Group():
-                gr.Markdown("### ✏️ 문장 입력 & 설정")
-                input_sentence = gr.Textbox(lines=5, label="분석할 영어 문장", placeholder="여기에 분석하고 싶은 영어 문장을 입력하세요...")
-                input_top_k = gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Top-K 토큰 수")
-                submit_button = gr.Button("분석 시작 💫", variant="primary", scale=1)
         with gr.Column(scale=2):
-            with gr.Accordion("🎯 예측 결과", open=True):
-                output_prediction_summary = gr.Textbox(label="간단 요약", lines=2, interactive=False)
-                output_prediction_details = gr.Label(label="상세 정보")
-            with gr.Accordion("⭐ Top-K 중요 토큰 (표)", open=True):
-                output_top_tokens_df = gr.DataFrame(headers=["Token", "Score"], label="중요도 높은 토큰",
                                                     row_count=(1,"dynamic"), col_count=(2,"fixed"), interactive=False, wrap=True)
     with gr.Tabs() as tabs:
-        with gr.TabItem("🎨 HTML 하이라이트", id=0):
-            output_html_visualization = gr.HTML(label="토큰별 중요도 (Gradient x Input)")
-        with gr.TabItem("🖍️ 텍스트 하이라이트", id=1):
             output_highlighted_text = gr.HighlightedText(
-                label="중요도 기반 텍스트 하이라이트 (점수: 0~1)",
                 show_legend=True,
                 combine_adjacent=False
             )
-        with gr.TabItem("📊 Top-K 막대 그래프", id=2):
             output_top_tokens_barplot = gr.BarPlot(
-                label="Top-K 토큰 중요도",
                 x="token",
                 y="score",
-                tooltip=['token', 'score'], # SyntaxError 수정됨
-                min_width=300
             )
-        with gr.TabItem("🌐 토큰 임베딩 3D PCA", id=3):
-            output_pca_plot = gr.Plot(label="토큰 임베딩 3D PCA (중요도 색상)")
     gr.Markdown("---")
     gr.Examples(
@@ -290,10 +322,9 @@ with gr.Blocks(title="AI 문장 분석기 XAI 🚀", theme=theme, css=".gradio-c
             output_pca_plot
         ],
         fn=analyze_sentence_for_gradio,
-        cache_examples=False
     )
-    # gr.Markdown을 gr.HTML로 변경하여 HTML 태그 직접 사용
-    gr.HTML("<p style='text-align: center; color: #666;'>Explainable AI Demo with Gradio & Transformers</p>")
     submit_button.click(
         fn=analyze_sentence_for_gradio,
@@ -310,7 +341,7 @@ with gr.Blocks(title="AI 문장 분석기 XAI 🚀", theme=theme, css=".gradio-c
 if __name__ == "__main__":
     if not MODELS_LOADED_SUCCESSFULLY:
         print("*"*80)
-        print(f"경고: 모델 로딩 실패! {MODEL_LOADING_ERROR_MESSAGE}")
-        print("Gradio UI는 표시되지만 분석 기능이 제대로 작동하지 않습니다.")
         print("*"*80)
     demo.launch()

 import html
 from transformers import AutoTokenizer, AutoModel, logging as hf_logging
 import pandas as pd
+import matplotlib # Still used for a basic empty plot if Plotly one is too complex for that
+matplotlib.use('Agg') # Matplotlib backend setting
 import matplotlib.pyplot as plt
 from sklearn.decomposition import PCA
+import plotly.graph_objects as go # For interactive 3D PCA plot
+# --- Global Settings and Model Loading ---
 hf_logging.set_verbosity_error()
 MODEL_NAME = "bert-base-uncased"
 DEVICE     = "cpu"
+SAVE_DIR   = "저장저장1" # This folder name is from your setup
 LAYER_ID   = 4
 SEED       = 0
 CLF_NAME   = "linear"
+# Class label mapping provided by user
+CLASS_LABEL_MAP = {
+    0: "World",
+    1: "Sports",
+    2: "Business",
+    3: "Sci/Tech"
+}
 TOKENIZER_GLOBAL, MODEL_GLOBAL = None, None
 W_GLOBAL, MU_GLOBAL, W_P_GLOBAL, B_P_GLOBAL = None, None, None, None
+# CLASS_NAMES_GLOBAL = None # We'll use CLASS_LABEL_MAP instead for clarity
 MODELS_LOADED_SUCCESSFULLY = False
 MODEL_LOADING_ERROR_MESSAGE = ""
 try:
+    print("Gradio App: Initializing model loading...")
     lda_file_path = os.path.join(SAVE_DIR, f"lda_layer{LAYER_ID}_seed{SEED}.pkl")
     clf_file_path = os.path.join(SAVE_DIR, f"{CLF_NAME}_layer{LAYER_ID}_projlda_seed{SEED}.pkl")
     if not os.path.isdir(SAVE_DIR):
+        raise FileNotFoundError(f"Error: Model storage directory '{SAVE_DIR}' not found.")
     if not os.path.exists(lda_file_path):
+        raise FileNotFoundError(f"Error: LDA model file '{lda_file_path}' not found.")
     if not os.path.exists(clf_file_path):
+        raise FileNotFoundError(f"Error: Classifier model file '{clf_file_path}' not found.")
     lda = joblib.load(lda_file_path)
     clf = joblib.load(clf_file_path)
         MODEL_NAME, output_hidden_states=True, output_attentions=False
     ).to(DEVICE).eval()
     MODELS_LOADED_SUCCESSFULLY = True
+    print("Gradio App: All models and data loaded successfully!")
 except Exception as e:
     MODELS_LOADED_SUCCESSFULLY = False
+    MODEL_LOADING_ERROR_MESSAGE = f"Critical error during model loading: {str(e)}\nPlease ensure the '{SAVE_DIR}' folder and its contents are correct."
     print(MODEL_LOADING_ERROR_MESSAGE)
+# Helper function: 3D PCA Visualization using Plotly
+def plot_token_pca_3d_plotly(token_embeddings_3d, tokens, scores, title="Token Embeddings 3D PCA (Colored by Importance)"):
+    num_annotations = min(len(tokens), 20) # Annotate up to 20 most important tokens
+    # Ensure scores is a 1D numpy array for Plotly marker color processing
+    scores_array = np.array(scores).flatten()
+    # Prepare text annotations (only for most important tokens to avoid clutter)
+    text_annotations = [''] * len(tokens)
+    if len(scores_array) > 0 and len(tokens) > 0:
+        indices_to_annotate = np.argsort(scores_array)[-num_annotations:]
+        for i in indices_to_annotate:
+            if i < len(tokens): # Ensure index is valid
+                 text_annotations[i] = tokens[i]
+    fig = go.Figure(data=[go.Scatter3d(
+        x=token_embeddings_3d[:, 0],
+        y=token_embeddings_3d[:, 1],
+        z=token_embeddings_3d[:, 2],
+        mode='markers+text', # Show markers, text for selected
+        text=text_annotations,
+        textfont=dict(size=9, color='#333333'),
+        textposition='top center',
+        marker=dict(
+            size=6,
+            color=scores_array,
+            colorscale='RdBu',
+            reversescale=True, # Makes red high, blue low (like coolwarm_r)
+            opacity=0.8,
+            colorbar=dict(title='Importance', tickfont=dict(size=9), len=0.75, yanchor='middle')
+        ),
+        hoverinfo='text', # Show full token text on hover
+        hovertext=[f"Token: {t}<br>Score: {s:.3f}" for t, s in zip(tokens, scores_array)] # Custom hover text
+    )])
+    fig.update_layout(
+        title=dict(text=title, x=0.5, font=dict(size=16)),
+        scene=dict(
+            xaxis=dict(title='PCA Comp 1', titlefont=dict(size=10), tickfont=dict(size=9), backgroundcolor="rgba(230, 230, 230, 0.8)"),
+            yaxis=dict(title='PCA Comp 2', titlefont=dict(size=10), tickfont=dict(size=9), backgroundcolor="rgba(230, 230, 230, 0.8)"),
+            zaxis=dict(title='PCA Comp 3', titlefont=dict(size=10), tickfont=dict(size=9), backgroundcolor="rgba(230, 230, 230, 0.8)"),
+            bgcolor="rgba(255, 255, 255, 0.95)",
+            camera_eye=dict(x=1.5, y=1.5, z=0.5) # Initial camera angle
+        ),
+        margin=dict(l=5, r=5, b=5, t=45),
+        paper_bgcolor='rgba(0,0,0,0)' # Transparent paper background
+    )
     return fig
+# Helper function: Create an empty Plotly figure for placeholders
+def create_empty_plotly_figure(message="N/A"):
+    fig = go.Figure()
+    fig.add_annotation(text=message, xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=12, color="grey"))
+    fig.update_layout(
+        xaxis={'visible': False},
+        yaxis={'visible': False},
+        height=300, # Define a height for empty plot
+        paper_bgcolor='rgba(0,0,0,0)',
+        plot_bgcolor='rgba(0,0,0,0)'
+        )
+    return fig
+# --- Core Analysis Function (returns 7 items for Gradio UI) ---
+def analyze_sentence_for_gradio(sentence_text, top_k_value):
     if not MODELS_LOADED_SUCCESSFULLY:
+        error_html = f"<p style='color:red;'>Initialization Error: {html.escape(MODEL_LOADING_ERROR_MESSAGE)}</p>"
         empty_df = pd.DataFrame(columns=['token', 'score'])
+        empty_fig = create_empty_plotly_figure("Model Loading Failed")
+        return error_html, [], "Model Loading Failed", {"Error":"Model Loading Failed"}, [], empty_df, empty_fig
     try:
         tokenizer, model = TOKENIZER_GLOBAL, MODEL_GLOBAL
         W, mu, w_p, b_p = W_GLOBAL, MU_GLOBAL, W_P_GLOBAL, B_P_GLOBAL
         enc = tokenizer(sentence_text, return_tensors="pt", truncation=True, max_length=510, padding=True)
         input_ids, attn_mask  = enc["input_ids"].to(DEVICE), enc["attention_mask"].to(DEVICE)
         if input_ids.shape[1] == 0:
             empty_df = pd.DataFrame(columns=['token', 'score'])
+            empty_fig = create_empty_plotly_figure("Invalid Input")
+            return "<p style='color:orange;'>Input Error: No valid tokens found.</p>", [], "Input Error", {"Error":"Input Error"}, [], empty_df, empty_fig
         input_embeds_detached = model.embeddings.word_embeddings(input_ids).clone().detach()
         input_embeds_for_grad = input_embeds_detached.clone().requires_grad_(True)
         outputs = model(inputs_embeds=input_embeds_for_grad, attention_mask=attn_mask,
                         output_hidden_states=True, output_attentions=False)
         cls_vec = outputs.hidden_states[LAYER_ID][:, 0, :]
         z_projected = (cls_vec - mu) @ W
         logit_output[0, pred_idx].backward()
         if input_embeds_for_grad.grad is None:
             empty_df = pd.DataFrame(columns=['token', 'score'])
+            empty_fig = create_empty_plotly_figure("Gradient Error")
+            return "<p style='color:red;'>Analysis Error: Gradient calculation failed.</p>", [],"Analysis Error", {"Error":"Analysis Error"}, [], empty_df, empty_fig
         grads = input_embeds_for_grad.grad.clone().detach()
         scores = (grads * input_embeds_detached).norm(dim=2).squeeze(0)
         scores_np = scores.cpu().numpy()
+        valid_scores_for_norm = scores_np[np.isfinite(scores_np)] # Renamed to avoid conflict
+        scores_np = scores_np / (valid_scores_for_norm.max() + 1e-9) if len(valid_scores_for_norm) > 0 and valid_scores_for_norm.max() > 0 else np.zeros_like(scores_np)
         tokens_raw = tokenizer.convert_ids_to_tokens(input_ids[0], skip_special_tokens=False)
         actual_tokens = [tok for i, tok in enumerate(tokens_raw) if input_ids[0,i] != tokenizer.pad_token_id]
                 html_tokens_list.append(f"<span style='font-weight:bold;'>{html.escape(clean_tok_str)}</span>")
                 highlighted_text_data.append((clean_tok_str + " ", None))
             else:
+                color = f"rgba(220, 50, 50, {current_score_clipped:.2f})" # Slightly adjusted red
+                html_tokens_list.append(f"<span style='background-color:{color}; color:white; padding: 1px 3px; margin: 1px; border-radius: 4px; display:inline-block;'>{html.escape(clean_tok_str)}</span>")
                 highlighted_text_data.append((clean_tok_str + " ", round(current_score_clipped, 3)))
         html_output_str = " ".join(html_tokens_list).replace(" ##", "")
         barplot_df = pd.DataFrame(top_tokens_for_barplot_list) if top_tokens_for_barplot_list else pd.DataFrame(columns=['token', 'score'])
+        predicted_class_label_str = CLASS_LABEL_MAP.get(pred_idx, f"Unknown Index: {pred_idx}")
+        prediction_summary_text = f"Predicted Class: {predicted_class_label_str}\nProbability: {pred_prob_val:.3f}"
+        prediction_details_for_label = {"Predicted Class": predicted_class_label_str, "Probability": f"{pred_prob_val:.3f}"}
+        pca_fig = create_empty_plotly_figure("PCA Plot N/A\n(Not enough non-special tokens for 3D)")
         non_special_token_indices = [idx for idx, token_id in enumerate(input_ids[0,:len(actual_tokens)].tolist())
                                      if token_id not in [cls_token_id, sep_token_id]]
             pca_tokens = [actual_tokens[i] for i in non_special_token_indices]
             if len(pca_tokens) > 0:
                 pca_embeddings = actual_input_embeds[non_special_token_indices, :]
+                pca_scores_for_plot = actual_scores_np[non_special_token_indices] # Use this for coloring
                 pca = PCA(n_components=3, random_state=SEED)
                 token_embeddings_3d = pca.fit_transform(pca_embeddings)
+                pca_fig = plot_token_pca_3d_plotly(token_embeddings_3d, pca_tokens, pca_scores_for_plot)
         return (html_output_str, highlighted_text_data,
                 prediction_summary_text, prediction_details_for_label,
     except Exception as e:
         import traceback
         tb_str = traceback.format_exc()
+        error_html = f"<p style='color:red;'>Analysis Error: {html.escape(str(e))}</p><pre>{html.escape(tb_str)}</pre>"
+        print(f"analyze_sentence_for_gradio error: {e}\n{tb_str}")
         empty_df = pd.DataFrame(columns=['token', 'score'])
+        empty_fig = create_empty_plotly_figure("Analysis Error")
+        return error_html, [], "Analysis Failed", {"Error": str(e)}, [], empty_df, empty_fig
+# --- Gradio UI Definition (Translated and Enhanced) ---
+# Using a built-in theme and some CSS for aesthetics
+theme = gr.themes.Monochrome(
+    primary_hue=gr.themes.colors.blue,
+    secondary_hue=gr.themes.colors.sky,
+    neutral_hue=gr.themes.colors.slate
+).set(
+    body_background_fill="#f0f2f6",
+    block_shadow="*shadow_drop_lg",
+    button_primary_background_fill="*primary_500",
+    button_primary_text_color="white",
 )
+with gr.Blocks(title="AI Sentence Analyzer XAI 🚀", theme=theme, css=".gradio-container {max-width: 98% !important;}") as demo:
+    gr.Markdown("# 🚀 AI Sentence Analyzer XAI: Exploring Model Explanations")
+    gr.Markdown("Analyze English sentences to understand BERT model predictions through various XAI visualization techniques. "
+                "Explore token importance and their distribution in the embedding space.")
     with gr.Row(equal_height=False):
+        with gr.Column(scale=1, min_width=350): # Increased min_width slightly
             with gr.Group():
+                gr.Markdown("### ✏️ Input Sentence & Settings")
+                input_sentence = gr.Textbox(lines=5, label="English Sentence to Analyze", placeholder="Enter the English sentence you want to analyze here...")
+                input_top_k = gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Number of Top-K Tokens")
+                submit_button = gr.Button("Analyze Sentence 💫", variant="primary")
         with gr.Column(scale=2):
+            with gr.Accordion("🎯 Prediction Outcome", open=True):
+                output_prediction_summary = gr.Textbox(label="Prediction Summary", lines=2, interactive=False)
+                output_prediction_details = gr.Label(label="Detailed Prediction")
+            with gr.Accordion("⭐ Top-K Important Tokens (Table)", open=True):
+                output_top_tokens_df = gr.DataFrame(headers=["Token", "Score"], label="Most Important Tokens",
                                                     row_count=(1,"dynamic"), col_count=(2,"fixed"), interactive=False, wrap=True)
     with gr.Tabs() as tabs:
+        with gr.TabItem("🎨 HTML Highlight (Custom)", id=0):
+            output_html_visualization = gr.HTML(label="Token Importance (Gradient x Input based)")
+        with gr.TabItem("🖍️ Highlighted Text (Gradio)", id=1):
             output_highlighted_text = gr.HighlightedText(
+                label="Token Importance (Score: 0-1)",
                 show_legend=True,
+                # Color map can be more sophisticated if scores are categorical
+                # For numerical scores (0-1), Gradio tries to infer intensity.
+                # Example color map (if scores were categories like "LOW", "MEDIUM", "HIGH"):
+                # color_map={"LOW": "lightblue", "MEDIUM": "lightgreen", "HIGH": "pink"},
                 combine_adjacent=False
             )
+        with gr.TabItem("📊 Top-K Bar Plot", id=2):
             output_top_tokens_barplot = gr.BarPlot(
+                label="Top-K Token Importance Scores",
                 x="token",
                 y="score",
+                tooltip=['token', 'score'],
+                min_width=300,
+                # title="Top-K Most Important Tokens" # BarPlot may not have a direct title prop
             )
+        with gr.TabItem("🌐 Token Embeddings 3D PCA (Interactive)", id=3):
+            output_pca_plot = gr.Plot(label="3D PCA of Token Embeddings (Colored by Importance Score)")
     gr.Markdown("---")
     gr.Examples(
             output_pca_plot
         ],
         fn=analyze_sentence_for_gradio,
+        cache_examples=False # Set to True for faster loading of examples if inputs/outputs are static
     )
+    gr.HTML("<p style='text-align: center; color: #4a5568;'>Explainable AI Demo powered by Gradio & Hugging Face Transformers</p>")
     submit_button.click(
         fn=analyze_sentence_for_gradio,
 if __name__ == "__main__":
     if not MODELS_LOADED_SUCCESSFULLY:
         print("*"*80)
+        print(f"WARNING: Models failed to load! {MODEL_LOADING_ERROR_MESSAGE}")
+        print("The Gradio UI will be displayed, but analysis will fail.")
         print("*"*80)
     demo.launch()