Spaces:

UMCU
/

PerplexityViewer

Sleeping

App Files Files Community

Bram van Es commited on Oct 30, 2025

Commit

5d8e89e

1 Parent(s): 3c38d1e

update for simple viewer

Browse files

Files changed (4) hide show

app.py +125 -120
config.py +113 -0
launch.py +48 -0
run.py +181 -0

app.py CHANGED Viewed

@@ -18,27 +18,33 @@ except ImportError:
     }
     MODEL_SETTINGS = {"max_length": 512}
     VIZ_SETTINGS = {
-        "max_perplexity_display": 100.0,
         "color_scheme": {
-            "high_perplexity": {"r": 255, "g": 0, "b": 50},
-            "low_perplexity": {"r": 0, "g": 255, "b": 50}
         },
         "displacy_options": {"ents": ["PP"], "colors": {}}
     }
     PROCESSING_SETTINGS = {
         "default_iterations": 1,
         "max_iterations": 10,
-        "default_mlm_probability": 0.15,
-        "min_mlm_probability": 0.1,
-        "max_mlm_probability": 0.5,
         "epsilon": 1e-10
     }
     UI_SETTINGS = {
-        "title": "📈 Perplexity Viewer",
-        "description": "Visualize per-token perplexity using color gradients.",
         "examples": [
-            {"text": "The quick brown fox jumps over the lazy dog.", "model": "gpt2", "type": "decoder", "iterations": 1, "mlm_prob": 0.15},
-            {"text": "The capital of France is Paris.", "model": "bert-base-uncased", "type": "encoder", "iterations": 1, "mlm_prob": 0.15}
         ]
     }
     ERROR_MESSAGES = {
@@ -138,8 +144,8 @@ def calculate_decoder_perplexity(text, model, tokenizer, iterations=1):
     return np.mean(perplexities), cleaned_tokens, token_perplexities
-def calculate_encoder_perplexity(text, model, tokenizer, mlm_probability=0.15, iterations=1):
-    """Calculate pseudo-perplexity for encoder models (like BERT) using MLM"""
     device = next(model.parameters()).device
     perplexities = []
@@ -152,48 +158,32 @@ def calculate_encoder_perplexity(text, model, tokenizer, mlm_probability=0.15, i
         if input_ids.size(1) < 3:  # Need at least [CLS] + 1 token + [SEP]
             raise gr.Error("Text is too short for MLM perplexity calculation.")
-        # Create a copy for masking
-        masked_input_ids = input_ids.clone()
-        original_tokens = input_ids.clone()
-        # Randomly mask tokens (excluding special tokens)
-        seq_length = input_ids.size(1)
-        mask_indices = []
-        special_token_ids = {tokenizer.cls_token_id, tokenizer.sep_token_id, tokenizer.pad_token_id}
-        for i in range(seq_length):
-            if input_ids[0, i].item() not in special_token_ids:
-                if torch.rand(1).item() < mlm_probability:
-                    mask_indices.append(i)
-                    masked_input_ids[0, i] = tokenizer.mask_token_id
-        if not mask_indices:
-            # If no tokens were masked, mask at least one non-special token
-            non_special_indices = [i for i in range(seq_length) if input_ids[0, i].item() not in special_token_ids]
-            if non_special_indices:
-                mask_idx = torch.randint(0, len(non_special_indices), (1,)).item()
-                mask_indices = [non_special_indices[mask_idx]]
-                masked_input_ids[0, mask_indices[0]] = tokenizer.mask_token_id
         with torch.no_grad():
-            outputs = model(masked_input_ids)
-            predictions = outputs.logits
-            # Calculate perplexity for masked tokens
-            masked_token_losses = []
-            for idx in mask_indices:
-                target_id = original_tokens[0, idx]
-                pred_scores = predictions[0, idx]
-                prob = F.softmax(pred_scores, dim=-1)[target_id]
-                loss = -torch.log(prob + PROCESSING_SETTINGS["epsilon"])
-                masked_token_losses.append(loss.item())
-            if masked_token_losses:
-                avg_loss = np.mean(masked_token_losses)
                 perplexity = math.exp(avg_loss)
                 perplexities.append(perplexity)
-    # Calculate per-token pseudo-perplexity for visualization
     with torch.no_grad():
         token_perplexities = []
         tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
@@ -203,6 +193,7 @@ def calculate_encoder_perplexity(text, model, tokenizer, mlm_probability=0.15, i
             if input_ids[0, i].item() in special_token_ids:
                 token_perplexities.append(1.0)  # Low perplexity for special tokens
             else:
                 masked_input = input_ids.clone()
                 original_token_id = input_ids[0, i]
                 masked_input[0, i] = tokenizer.mask_token_id
@@ -224,7 +215,7 @@ def calculate_encoder_perplexity(text, model, tokenizer, mlm_probability=0.15, i
     return np.mean(perplexities) if perplexities else float('inf'), cleaned_tokens, np.array(token_perplexities)
 def create_visualization(tokens, perplexities):
-    """Create displaCy visualization with color-coded perplexities"""
     if len(tokens) == 0:
         return "<p>No tokens to visualize.</p>"
@@ -234,10 +225,17 @@ def create_visualization(tokens, perplexities):
     # Normalize perplexities to 0-1 range for color mapping
     normalized_perplexities = np.clip(perplexities / max_perplexity, 0, 1)
-    # Create entities for displaCy
-    entities = []
-    text_parts = []
-    current_pos = 0
     for i, (token, perp, norm_perp) in enumerate(zip(tokens, perplexities, normalized_perplexities)):
         # Skip empty tokens
@@ -245,57 +243,82 @@ def create_visualization(tokens, perplexities):
             continue
         # Clean token for display
-        clean_token = token.replace("</w>", "").strip()
         if not clean_token:
             continue
-        # Add space before token if it's not the first one and doesn't start with punctuation
         if i > 0 and not clean_token[0] in ".,!?;:":
-            text_parts.append(" ")
-            current_pos += 1
-        text_parts.append(clean_token)
-        # Map perplexity to color
-        high_color = VIZ_SETTINGS["color_scheme"]["high_perplexity"]
         low_color = VIZ_SETTINGS["color_scheme"]["low_perplexity"]
-        red = int(high_color["r"] * norm_perp + low_color["r"] * (1 - norm_perp))
-        green = int(high_color["g"] * norm_perp + low_color["g"] * (1 - norm_perp))
-        blue = int(high_color["b"] * norm_perp + low_color["b"] * (1 - norm_perp))
-        color = f"rgb({red}, {green}, {blue})"
-        entities.append({
-            "start": current_pos,
-            "end": current_pos + len(clean_token),
-            "label": f"{perp:.2f}",
-            "color": color
-        })
-        current_pos += len(clean_token)
-    # Join text parts
-    text = "".join(text_parts)
-    if not entities:
-        return "<p>No valid tokens found for visualization.</p>"
-    # Create displaCy data structure
-    doc_data = {
-        "text": text,
-        "ents": entities,
-        "title": "Per-token Perplexity Visualization"
-    }
-    try:
-        # Generate HTML
-        html = displacy.render(doc_data, style="ent", manual=True, options=VIZ_SETTINGS["displacy_options"])
-        return html
-    except Exception as e:
-        return f"<p>Error creating visualization: {str(e)}</p>"
-def process_text(text, model_name, model_type, iterations, mlm_probability):
     """Main processing function"""
     if not text.strip():
         return ERROR_MESSAGES["empty_text"], "", pd.DataFrame()
@@ -303,8 +326,6 @@ def process_text(text, model_name, model_type, iterations, mlm_probability):
     try:
         # Validate inputs
         iterations = max(1, min(iterations, PROCESSING_SETTINGS["max_iterations"]))
-        mlm_probability = max(PROCESSING_SETTINGS["min_mlm_probability"],
-                            min(mlm_probability, PROCESSING_SETTINGS["max_mlm_probability"]))
         # Load model and tokenizer
         model, tokenizer = load_model_and_tokenizer(model_name, model_type)
@@ -316,7 +337,7 @@ def process_text(text, model_name, model_type, iterations, mlm_probability):
             )
         else:  # encoder
             avg_perplexity, tokens, token_perplexities = calculate_encoder_perplexity(
-                text, model, tokenizer, mlm_probability, iterations
             )
         # Create visualization
@@ -333,8 +354,6 @@ def process_text(text, model_name, model_type, iterations, mlm_probability):
 **Iterations:** {iterations}
 """
-        if model_type == "encoder":
-            summary += f"  \n**MLM Probability:** {mlm_probability}"
         # Create detailed results table
         df = pd.DataFrame({
@@ -387,17 +406,6 @@ with gr.Blocks(title=UI_SETTINGS["title"], theme=gr.themes.Soft()) as demo:
                     step=1,
                     info="Number of iterations to average over"
                 )
-                mlm_probability = gr.Slider(
-                    label="MLM Probability",
-                    minimum=PROCESSING_SETTINGS["min_mlm_probability"],
-                    maximum=PROCESSING_SETTINGS["max_mlm_probability"],
-                    value=PROCESSING_SETTINGS["default_mlm_probability"],
-                    step=0.05,
-                    visible=False,
-                    info="Probability of masking tokens (encoder models only)"
-                )
             analyze_btn = gr.Button("🔍 Analyze Perplexity", variant="primary", size="lg")
         with gr.Column(scale=3):
@@ -416,33 +424,29 @@ with gr.Blocks(title=UI_SETTINGS["title"], theme=gr.themes.Soft()) as demo:
     def update_model_choices(model_type):
         return gr.update(choices=DEFAULT_MODELS[model_type], value=DEFAULT_MODELS[model_type][0])
-    # Show/hide MLM probability based on model type
-    def toggle_mlm_visibility(model_type):
-        return gr.update(visible=(model_type == "encoder"))
     model_type.change(
-        fn=lambda mt: [update_model_choices(mt), toggle_mlm_visibility(mt)],
         inputs=[model_type],
-        outputs=[model_name, mlm_probability]
     )
     # Set up the analysis function
     analyze_btn.click(
         fn=process_text,
-        inputs=[text_input, model_name, model_type, iterations, mlm_probability],
         outputs=[summary_output, viz_output, table_output]
     )
     # Add examples
     with gr.Accordion("📝 Example Texts", open=False):
         examples_data = [
-            [ex["text"], ex["model"], ex["type"], ex["iterations"], ex["mlm_prob"]]
             for ex in UI_SETTINGS["examples"]
         ]
         gr.Examples(
             examples=examples_data,
-            inputs=[text_input, model_name, model_type, iterations, mlm_probability],
             outputs=[summary_output, viz_output, table_output],
             fn=process_text,
             cache_examples=False,
@@ -464,6 +468,7 @@ with gr.Blocks(title=UI_SETTINGS["title"], theme=gr.themes.Soft()) as demo:
     - Models are cached after first use
     - Very long texts are truncated to 512 tokens
     - GPU acceleration is used when available
     """)
 if __name__ == "__main__":

     }
     MODEL_SETTINGS = {"max_length": 512}
     VIZ_SETTINGS = {
+        "max_perplexity_display": 50.0,
         "color_scheme": {
+            "low_perplexity": {"r": 46, "g": 204, "b": 113},
+            "medium_perplexity": {"r": 241, "g": 196, "b": 15},
+            "high_perplexity": {"r": 231, "g": 76, "b": 60},
+            "background_alpha": 0.7,
+            "border_alpha": 0.9
+        },
+        "thresholds": {
+            "low_threshold": 0.3,
+            "high_threshold": 0.7
         },
         "displacy_options": {"ents": ["PP"], "colors": {}}
     }
     PROCESSING_SETTINGS = {
         "default_iterations": 1,
         "max_iterations": 10,
         "epsilon": 1e-10
     }
     UI_SETTINGS = {
+        "title": "📈 Perplexity Viewer Simple",
+        "description": "Visualize per-token perplexity using color gradients. Assumes single token masking.",
         "examples": [
+            {"text": "The quick brown fox jumps over the lazy dog.", "model": "gpt2", "type": "decoder", "iterations": 1},
+            {"text": "The capital of France is Paris.", "model": "bert-base-uncased", "type": "encoder", "iterations": 1},
+            {"text": "Quantum entanglement defies classical physics intuition completely.", "model": "distilgpt2", "type": "decoder", "iterations": 1},
+            {"text": "Machine learning algorithms require computational resources.", "model": "distilbert-base-uncased", "type": "encoder", "iterations": 1}
         ]
     }
     ERROR_MESSAGES = {
     return np.mean(perplexities), cleaned_tokens, token_perplexities
+def calculate_encoder_perplexity(text, model, tokenizer, iterations=1):
+    """Calculate pseudo-perplexity for encoder models (like BERT) using MLM on all tokens"""
     device = next(model.parameters()).device
     perplexities = []
         if input_ids.size(1) < 3:  # Need at least [CLS] + 1 token + [SEP]
             raise gr.Error("Text is too short for MLM perplexity calculation.")
+        # Calculate average perplexity by masking all content tokens
         with torch.no_grad():
+            seq_length = input_ids.size(1)
+            special_token_ids = {tokenizer.cls_token_id, tokenizer.sep_token_id, tokenizer.pad_token_id}
+            all_token_losses = []
+            # Mask each non-special token individually and calculate loss
+            for i in range(seq_length):
+                if input_ids[0, i].item() not in special_token_ids:
+                    masked_input = input_ids.clone()
+                    original_token_id = input_ids[0, i]
+                    masked_input[0, i] = tokenizer.mask_token_id
+                    outputs = model(masked_input)
+                    predictions = outputs.logits[0, i]
+                    prob = F.softmax(predictions, dim=-1)[original_token_id]
+                    loss = -torch.log(prob + PROCESSING_SETTINGS["epsilon"])
+                    all_token_losses.append(loss.item())
+            if all_token_losses:
+                avg_loss = np.mean(all_token_losses)
                 perplexity = math.exp(avg_loss)
                 perplexities.append(perplexity)
+    # Calculate per-token pseudo-perplexity for visualization (analyze all tokens)
     with torch.no_grad():
         token_perplexities = []
         tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
             if input_ids[0, i].item() in special_token_ids:
                 token_perplexities.append(1.0)  # Low perplexity for special tokens
             else:
+                # Calculate detailed perplexity for every content token
                 masked_input = input_ids.clone()
                 original_token_id = input_ids[0, i]
                 masked_input[0, i] = tokenizer.mask_token_id
     return np.mean(perplexities) if perplexities else float('inf'), cleaned_tokens, np.array(token_perplexities)
 def create_visualization(tokens, perplexities):
+    """Create custom HTML visualization with color-coded perplexities"""
     if len(tokens) == 0:
         return "<p>No tokens to visualize.</p>"
     # Normalize perplexities to 0-1 range for color mapping
     normalized_perplexities = np.clip(perplexities / max_perplexity, 0, 1)
+    # Create HTML with inline styles for color coding
+    html_parts = [
+        '<div style="font-family: Arial, sans-serif; font-size: 16px; line-height: 1.8; padding: 20px; border: 1px solid #ddd; border-radius: 8px; background-color: #fafafa;">',
+        '<h3 style="margin-top: 0; color: #333;">Per-token Perplexity Visualization</h3>',
+        '<div style="margin-bottom: 15px;">',
+        '<span style="font-size: 12px; color: #666;">',
+        '🟢 Low perplexity (confident) → 🟡 Medium → 🔴 High perplexity (uncertain)',
+        '</span>',
+        '</div>',
+        '<div style="line-height: 2.0;">'
+    ]
     for i, (token, perp, norm_perp) in enumerate(zip(tokens, perplexities, normalized_perplexities)):
         # Skip empty tokens
             continue
         # Clean token for display
+        clean_token = token.replace("</w>", "").replace("##", "").strip()
         if not clean_token:
             continue
+        # Add space before token if needed
         if i > 0 and not clean_token[0] in ".,!?;:":
+            html_parts.append(" ")
+        # Get color thresholds from configuration
+        low_thresh = VIZ_SETTINGS.get("thresholds", {}).get("low_threshold", 0.3)
+        high_thresh = VIZ_SETTINGS.get("thresholds", {}).get("high_threshold", 0.7)
+        # Get colors from configuration
         low_color = VIZ_SETTINGS["color_scheme"]["low_perplexity"]
+        med_color = VIZ_SETTINGS["color_scheme"]["medium_perplexity"]
+        high_color = VIZ_SETTINGS["color_scheme"]["high_perplexity"]
+        # Map perplexity to color using configuration
+        if norm_perp < low_thresh:  # Low perplexity - green
+            # Interpolate between green and yellow
+            factor = norm_perp / low_thresh
+            red = int(low_color["r"] + factor * (med_color["r"] - low_color["r"]))
+            green = int(low_color["g"] + factor * (med_color["g"] - low_color["g"]))
+            blue = int(low_color["b"] + factor * (med_color["b"] - low_color["b"]))
+        elif norm_perp < high_thresh:  # Medium perplexity - yellow/orange
+            # Interpolate between yellow and red
+            factor = (norm_perp - low_thresh) / (high_thresh - low_thresh)
+            red = int(med_color["r"] + factor * (high_color["r"] - med_color["r"]))
+            green = int(med_color["g"] + factor * (high_color["g"] - med_color["g"]))
+            blue = int(med_color["b"] + factor * (high_color["b"] - med_color["b"]))
+        else:  # High perplexity - red
+            # Use high perplexity color, potentially darker for very high values
+            factor = min((norm_perp - high_thresh) / (1.0 - high_thresh), 1.0)
+            darken = 0.8 - (factor * 0.3)  # Darken by up to 30%
+            red = int(high_color["r"] * darken)
+            green = int(high_color["g"] * darken)
+            blue = int(high_color["b"] * darken)
+        tooltip_text = f"Perplexity: {perp:.3f} (normalized: {norm_perp:.3f})"
+        # Clamp values
+        red = max(0, min(255, red))
+        green = max(0, min(255, green))
+        blue = max(0, min(255, blue))
+        # Get alpha values from configuration
+        bg_alpha = VIZ_SETTINGS["color_scheme"].get("background_alpha", 0.7)
+        border_alpha = VIZ_SETTINGS["color_scheme"].get("border_alpha", 0.9)
+        # Create colored span with tooltip
+        html_parts.append(
+            f'<span style="'
+            f'background-color: rgba({red}, {green}, {blue}, {bg_alpha}); '
+            f'color: #000; '
+            f'padding: 2px 4px; '
+            f'margin: 1px; '
+            f'border-radius: 3px; '
+            f'border: 1px solid rgba({red}, {green}, {blue}, {border_alpha}); '
+            f'font-weight: 500; '
+            f'cursor: help; '
+            f'display: inline-block;'
+            f'" title="{tooltip_text}">{clean_token}</span>'
+        )
+    html_parts.extend([
+        '</div>',
+        '<div style="margin-top: 15px; font-size: 12px; color: #666;">',
+        f'Max perplexity in visualization: {max_perplexity:.2f} | ',
+        f'Total tokens: {len(tokens)}',
+        '</div>',
+        '</div>'
+    ])
+    return "".join(html_parts)
+def process_text(text, model_name, model_type, iterations):
     """Main processing function"""
     if not text.strip():
         return ERROR_MESSAGES["empty_text"], "", pd.DataFrame()
     try:
         # Validate inputs
         iterations = max(1, min(iterations, PROCESSING_SETTINGS["max_iterations"]))
         # Load model and tokenizer
         model, tokenizer = load_model_and_tokenizer(model_name, model_type)
             )
         else:  # encoder
             avg_perplexity, tokens, token_perplexities = calculate_encoder_perplexity(
+                text, model, tokenizer, iterations
             )
         # Create visualization
 **Iterations:** {iterations}
 """
         # Create detailed results table
         df = pd.DataFrame({
                     step=1,
                     info="Number of iterations to average over"
                 )
             analyze_btn = gr.Button("🔍 Analyze Perplexity", variant="primary", size="lg")
         with gr.Column(scale=3):
     def update_model_choices(model_type):
         return gr.update(choices=DEFAULT_MODELS[model_type], value=DEFAULT_MODELS[model_type][0])
     model_type.change(
+        fn=update_model_choices,
         inputs=[model_type],
+        outputs=[model_name]
     )
     # Set up the analysis function
     analyze_btn.click(
         fn=process_text,
+        inputs=[text_input, model_name, model_type, iterations],
         outputs=[summary_output, viz_output, table_output]
     )
     # Add examples
     with gr.Accordion("📝 Example Texts", open=False):
         examples_data = [
+            [ex["text"], ex["model"], ex["type"], ex["iterations"]]
             for ex in UI_SETTINGS["examples"]
         ]
         gr.Examples(
             examples=examples_data,
+            inputs=[text_input, model_name, model_type, iterations],
             outputs=[summary_output, viz_output, table_output],
             fn=process_text,
             cache_examples=False,
     - Models are cached after first use
     - Very long texts are truncated to 512 tokens
     - GPU acceleration is used when available
+    - For encoder models, all content tokens are analyzed for comprehensive results
     """)
 if __name__ == "__main__":

config.py ADDED Viewed

	@@ -0,0 +1,113 @@

+# Configuration file for PerplexityViewer
+# Default models for different types
+DEFAULT_MODELS = {
+    "decoder": [
+        "gpt2",
+        "distilgpt2",
+        "microsoft/DialoGPT-small",
+        "microsoft/DialoGPT-medium",
+        "openai-gpt"
+    ],
+    "encoder": [
+        "bert-base-uncased",
+        "bert-base-cased",
+        "distilbert-base-uncased",
+        "roberta-base",
+        "albert-base-v2"
+        "UMCU/CardioMedRoBERTa.nl",
+        "UMCU/CardioBERTa.nl",
+        "UMCU/CardioBERTa.nl_clinical",
+        "CLTL/MedRoBERTa.nl",
+        "DTAI-KULeuven/robbert-2023-dutch-base",
+        "DTAI-KULeuven/robbert-2023-dutch-large"
+    ]
+}
+# Model display settings
+MODEL_SETTINGS = {
+    "max_length": 512,
+    "torch_dtype": "float16",
+    "device_map": "auto"
+}
+# Visualization settings
+VIZ_SETTINGS = {
+    "max_perplexity_display": 50.0,  # Cap visualization at this perplexity value
+    "color_scheme": {
+        "low_perplexity": {"r": 46, "g": 204, "b": 113},    # Green for low perplexity (confident)
+        "medium_perplexity": {"r": 241, "g": 196, "b": 15},  # Yellow for medium perplexity
+        "high_perplexity": {"r": 231, "g": 76, "b": 60},    # Red for high perplexity (uncertain)
+        "background_alpha": 0.7,  # Background transparency
+        "border_alpha": 0.9       # Border transparency
+    },
+    "thresholds": {
+        "low_threshold": 0.3,    # Below this is low perplexity (green)
+        "high_threshold": 0.7    # Above this is high perplexity (red)
+    },
+    "displacy_options": {
+        "ents": ["PP"],
+        "colors": {}
+    }
+}
+# Processing settings
+PROCESSING_SETTINGS = {
+    "default_iterations": 1,
+    "max_iterations": 10,
+    "epsilon": 1e-10  # Small value to avoid log(0)
+}
+# UI settings
+UI_SETTINGS = {
+    "theme": "soft",
+    "title": "📈 Perplexity Viewer",
+    "description": """
+    Visualize per-token perplexity using color gradients.
+    - **Red**: High perplexity (model is uncertain)
+    - **Green**: Low perplexity (model is confident)
+    Choose between decoder models (like GPT) for true perplexity or encoder models (like BERT) for pseudo-perplexity via MLM.
+    """,
+    "examples": [
+        {
+            "text": "The quick brown fox jumps over the lazy dog.",
+            "model": "gpt2",
+            "type": "decoder",
+            "iterations": 1
+        },
+        {
+            "text": "The capital of France is Paris.",
+            "model": "bert-base-uncased",
+            "type": "encoder",
+            "iterations": 1
+        },
+        {
+            "text": "Quantum entanglement defies classical physics intuition completely.",
+            "model": "distilgpt2",
+            "type": "decoder",
+            "iterations": 1
+        },
+        {
+            "text": "Buffalo buffalo Buffalo buffalo buffalo buffalo Buffalo buffalo.",
+            "model": "gpt2",
+            "type": "decoder",
+            "iterations": 1
+        },
+        {
+            "text": "Machine learning algorithms require computational resources.",
+            "model": "distilbert-base-uncased",
+            "type": "encoder",
+            "iterations": 1
+        }
+    ]
+}
+# Error messages
+ERROR_MESSAGES = {
+    "empty_text": "Please enter some text to analyze.",
+    "model_load_error": "Error loading model {model_name}: {error}",
+    "processing_error": "Error processing text: {error}",
+    "no_tokens_masked": "No tokens were masked during MLM processing.",
+    "invalid_model_type": "Invalid model type. Must be 'encoder' or 'decoder'."
+}

launch.py ADDED Viewed

	@@ -0,0 +1,48 @@

+#!/usr/bin/env python3
+"""
+Simple launcher for PerplexityViewer that handles common issues
+"""
+import sys
+import os
+def main():
+    """Simple launcher with fallback options"""
+    print("🚀 Starting PerplexityViewer...")
+    try:
+        # Try importing required modules
+        import gradio as gr
+        print(f"✅ Gradio version: {gr.__version__}")
+        # Import the app
+        from app import demo
+        # Launch with minimal configuration
+        print("🌐 Launching app at http://localhost:7860")
+        demo.launch()
+    except ImportError as e:
+        print(f"❌ Missing dependency: {e}")
+        print("💡 Install requirements with: pip install -r requirements.txt")
+        sys.exit(1)
+    except Exception as e:
+        print(f"❌ Launch failed: {e}")
+        print("💡 Trying alternative methods...")
+        # Try different launch approaches
+        try:
+            from app import demo
+            demo.launch(server_name="127.0.0.1", server_port=7860)
+        except:
+            try:
+                from app import demo
+                demo.launch(share=False, debug=True)
+            except:
+                print("❌ All launch methods failed")
+                print("💡 Try running: python app.py directly")
+                sys.exit(1)
+if __name__ == "__main__":
+    main()

run.py ADDED Viewed

	@@ -0,0 +1,181 @@

+#!/usr/bin/env python3
+"""
+Startup script for PerplexityViewer Gradio app
+"""
+import os
+import sys
+import subprocess
+import argparse
+import warnings
+# Suppress warnings
+warnings.filterwarnings("ignore")
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+def check_dependencies():
+    """Check if required packages are installed"""
+    required_packages = [
+        "torch",
+        "transformers",
+        "gradio",
+        "pandas",
+        "spacy",
+        "numpy"
+    ]
+    missing_packages = []
+    for package in required_packages:
+        try:
+            __import__(package)
+        except ImportError:
+            missing_packages.append(package)
+    if missing_packages:
+        print("❌ Missing required packages:")
+        for package in missing_packages:
+            print(f"   - {package}")
+        print("\n📦 Install missing packages with:")
+        print(f"   pip install {' '.join(missing_packages)}")
+        return False
+    print("✅ All required packages are installed")
+    return True
+def install_dependencies():
+    """Install dependencies from requirements.txt"""
+    print("📦 Installing dependencies...")
+    try:
+        subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])
+        print("✅ Dependencies installed successfully")
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"❌ Failed to install dependencies: {e}")
+        return False
+def run_tests():
+    """Run the test suite"""
+    print("🧪 Running tests...")
+    try:
+        result = subprocess.run([sys.executable, "test_app.py"],
+                              capture_output=True, text=True)
+        if result.returncode == 0:
+            print("✅ All tests passed")
+            return True
+        else:
+            print("❌ Some tests failed:")
+            print(result.stdout)
+            print(result.stderr)
+            return False
+    except FileNotFoundError:
+        print("⚠️  Test file not found, skipping tests")
+        return True
+def launch_app(share=False, debug=False, port=7860):
+    """Launch the Gradio app"""
+    print("🚀 Starting PerplexityViewer...")
+    # Set environment variables
+    if debug:
+        os.environ["GRADIO_DEBUG"] = "1"
+    try:
+        # Import and launch the app
+        from app import demo
+        # Prepare launch arguments with version compatibility
+        launch_args = {
+            "server_name": "0.0.0.0" if not debug else "127.0.0.1",
+            "server_port": port,
+            "share": share,
+            "show_api": False
+        }
+        # Add quiet parameter only if supported (older Gradio versions)
+        try:
+            import gradio as gr
+            # Check if quiet parameter is supported
+            import inspect
+            launch_signature = inspect.signature(demo.launch)
+            if 'quiet' in launch_signature.parameters:
+                launch_args["quiet"] = not debug
+        except:
+            pass  # If we can't check, just skip the quiet parameter
+        demo.launch(**launch_args)
+    except KeyboardInterrupt:
+        print("\n👋 Shutting down PerplexityViewer")
+    except Exception as e:
+        print(f"❌ Failed to launch app: {e}")
+        print("💡 Try updating Gradio: pip install --upgrade gradio")
+        sys.exit(1)
+def main():
+    """Main entry point"""
+    parser = argparse.ArgumentParser(
+        description="PerplexityViewer - Visualize text perplexity with color gradients",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python run.py                    # Launch with default settings
+  python run.py --install          # Install dependencies first
+  python run.py --test             # Run tests before launching
+  python run.py --share             # Create shareable link
+  python run.py --debug --port 8080 # Debug mode on custom port
+        """
+    )
+    parser.add_argument("--install", action="store_true",
+                       help="Install dependencies before launching")
+    parser.add_argument("--test", action="store_true",
+                       help="Run tests before launching")
+    parser.add_argument("--share", action="store_true",
+                       help="Create a shareable Gradio link")
+    parser.add_argument("--debug", action="store_true",
+                       help="Enable debug mode")
+    parser.add_argument("--port", type=int, default=7860,
+                       help="Port to run the server on (default: 7860)")
+    parser.add_argument("--skip-checks", action="store_true",
+                       help="Skip dependency checks")
+    args = parser.parse_args()
+    print("="*60)
+    print("🎯 PerplexityViewer Startup")
+    print("="*60)
+    # Install dependencies if requested
+    if args.install:
+        if not install_dependencies():
+            sys.exit(1)
+    # Check dependencies unless skipped
+    if not args.skip_checks:
+        if not check_dependencies():
+            print("\n💡 Try running with --install to install missing packages")
+            sys.exit(1)
+    # Run tests if requested
+    if args.test:
+        if not run_tests():
+            print("\n⚠️  Tests failed, but continuing anyway...")
+            print("   Use Ctrl+C to cancel or wait to launch app")
+            try:
+                import time
+                time.sleep(3)
+            except KeyboardInterrupt:
+                print("\n👋 Cancelled")
+                sys.exit(0)
+    # Launch the app
+    print(f"\n🌐 App will be available at: http://localhost:{args.port}")
+    if args.share:
+        print("🔗 A shareable link will be created")
+    launch_app(share=args.share, debug=args.debug, port=args.port)
+if __name__ == "__main__":
+    main()