Spaces:
Build error
Build error
| import gradio as gr | |
| import json | |
| import numpy as np | |
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| from plotly.subplots import make_subplots | |
| import pandas as pd | |
| from difflib import SequenceMatcher | |
| from visualization.ngram_visualizer import create_ngram_visualization | |
| from visualization.topic_visualizer import process_and_visualize_topic_analysis # Added import | |
| def create_bow_visualization(analysis_results): | |
| """ | |
| Create visualizations for bag of words analysis results | |
| Args: | |
| analysis_results (dict): Analysis results from the bow analysis | |
| Returns: | |
| list: List of gradio components with visualizations | |
| """ | |
| # Parse analysis results if it's a string | |
| if isinstance(analysis_results, str): | |
| try: | |
| results = json.loads(analysis_results) | |
| except json.JSONDecodeError: | |
| return [gr.Markdown("Error parsing analysis results.")] | |
| else: | |
| results = analysis_results | |
| output_components = [] | |
| # Check if we have valid results | |
| if not results or "analyses" not in results: | |
| return [gr.Markdown("No analysis results found.")] | |
| # Process each prompt | |
| for prompt, analyses in results["analyses"].items(): | |
| output_components.append(gr.Markdown(f"## Analysis of Prompt: \"{prompt}\"")) | |
| # Process Bag of Words analysis if available | |
| if "bag_of_words" in analyses: | |
| bow_results = analyses["bag_of_words"] | |
| # Show models being compared | |
| models = bow_results.get("models", []) | |
| if len(models) >= 2: | |
| output_components.append(gr.Markdown(f"### Comparing responses from {models[0]} and {models[1]}")) | |
| # Get important words for each model | |
| important_words = bow_results.get("important_words", {}) | |
| # Prepare data for plotting important words | |
| if important_words: | |
| for model_name, words in important_words.items(): | |
| df = pd.DataFrame(words) | |
| # Create bar chart for top words | |
| fig = px.bar(df, x='word', y='count', | |
| title=f"Top Words Used by {model_name}", | |
| labels={'word': 'Word', 'count': 'Frequency'}, | |
| height=400) | |
| # Improve layout | |
| fig.update_layout( | |
| xaxis_title="Word", | |
| yaxis_title="Frequency", | |
| xaxis={'categoryorder':'total descending'} | |
| ) | |
| output_components.append(gr.Plot(value=fig)) | |
| # Visualize differential words (words with biggest frequency difference) | |
| diff_words = bow_results.get("differential_words", []) | |
| word_matrix = bow_results.get("word_count_matrix", {}) | |
| if diff_words and word_matrix and len(diff_words) > 0: | |
| output_components.append(gr.Markdown("### Words with Biggest Frequency Differences")) | |
| # Create dataframe for plotting | |
| model1, model2 = models[0], models[1] | |
| diff_data = [] | |
| for word in diff_words[:15]: # Limit to top 15 for readability | |
| if word in word_matrix: | |
| counts = word_matrix[word] | |
| diff_data.append({ | |
| "word": word, | |
| model1: counts.get(model1, 0), | |
| model2: counts.get(model2, 0) | |
| }) | |
| if diff_data: | |
| diff_df = pd.DataFrame(diff_data) | |
| # Create grouped bar chart | |
| fig = go.Figure() | |
| fig.add_trace(go.Bar( | |
| x=diff_df['word'], | |
| y=diff_df[model1], | |
| name=model1, | |
| marker_color='indianred' | |
| )) | |
| fig.add_trace(go.Bar( | |
| x=diff_df['word'], | |
| y=diff_df[model2], | |
| name=model2, | |
| marker_color='lightsalmon' | |
| )) | |
| fig.update_layout( | |
| title="Word Frequency Comparison", | |
| xaxis_title="Word", | |
| yaxis_title="Frequency", | |
| barmode='group', | |
| height=500 | |
| ) | |
| output_components.append(gr.Plot(value=fig)) | |
| # If no components were added, show a message | |
| if len(output_components) <= 1: | |
| output_components.append(gr.Markdown("No detailed Bag of Words analysis found in results.")) | |
| return output_components | |
| # update the process_and_visualize_analysis function | |
| def process_and_visualize_analysis(analysis_results): | |
| """ | |
| Process the analysis results and create visualization components | |
| Args: | |
| analysis_results (dict): The analysis results | |
| Returns: | |
| list: List of gradio components for visualization | |
| """ | |
| try: | |
| print(f"Starting visualization of analysis results: {type(analysis_results)}") | |
| components = [] | |
| if not analysis_results or "analyses" not in analysis_results: | |
| print("Warning: Empty or invalid analysis results") | |
| components.append(gr.Markdown("No analysis results to visualize.")) | |
| return components | |
| # For each prompt in the analysis results | |
| for prompt, analyses in analysis_results.get("analyses", {}).items(): | |
| print(f"Visualizing results for prompt: {prompt[:30]}...") | |
| components.append(gr.Markdown(f"## Analysis for Prompt:\n\"{prompt}\"")) | |
| # Check for Bag of Words analysis | |
| if "bag_of_words" in analyses: | |
| print("Processing Bag of Words visualization") | |
| components.append(gr.Markdown("### Bag of Words Analysis")) | |
| bow_results = analyses["bag_of_words"] | |
| # Display models compared | |
| if "models" in bow_results: | |
| models = bow_results["models"] | |
| components.append(gr.Markdown(f"**Models compared**: {', '.join(models)}")) | |
| # Display important words for each model | |
| if "important_words" in bow_results: | |
| components.append(gr.Markdown("#### Most Common Words by Model")) | |
| for model, words in bow_results["important_words"].items(): | |
| print(f"Creating word list for model {model}") | |
| word_list = [f"{item['word']} ({item['count']})" for item in words[:10]] | |
| components.append(gr.Markdown(f"**{model}**: {', '.join(word_list)}")) | |
| # Add visualizations for word frequency differences | |
| if "differential_words" in bow_results and "word_count_matrix" in bow_results and len( | |
| bow_results["models"]) >= 2: | |
| diff_words = bow_results["differential_words"] | |
| word_matrix = bow_results["word_count_matrix"] | |
| models = bow_results["models"] | |
| if diff_words and word_matrix and len(diff_words) > 0: | |
| components.append(gr.Markdown("### Words with Biggest Frequency Differences")) | |
| # Create dataframe for plotting | |
| model1, model2 = models[0], models[1] | |
| diff_data = [] | |
| for word in diff_words[:10]: # Limit to top 10 for readability | |
| if word in word_matrix: | |
| counts = word_matrix[word] | |
| model1_count = counts.get(model1, 0) | |
| model2_count = counts.get(model2, 0) | |
| # Only include if there's a meaningful difference | |
| if abs(model1_count - model2_count) > 0: | |
| components.append(gr.Markdown( | |
| f"- **{word}**: {model1}: {model1_count}, {model2}: {model2_count}" | |
| )) | |
| # Check for N-gram analysis | |
| if "ngram_analysis" in analyses: | |
| print("Processing N-gram visualization") | |
| # Use the dedicated n-gram visualization function | |
| ngram_components = create_ngram_visualization( | |
| {"analyses": {prompt: {"ngram_analysis": analyses["ngram_analysis"]}}}) | |
| components.extend(ngram_components) | |
| # Check for Topic Modeling analysis | |
| if "topic_modeling" in analyses: | |
| print("Processing Topic Modeling visualization") | |
| # Use the dedicated topic visualization function | |
| topic_components = process_and_visualize_topic_analysis( | |
| {"analyses": {prompt: {"topic_modeling": analyses["topic_modeling"]}}}) | |
| components.extend(topic_components) | |
| if not components: | |
| components.append(gr.Markdown("No visualization components could be created from the analysis results.")) | |
| print(f"Visualization complete: generated {len(components)} components") | |
| return components | |
| except Exception as e: | |
| import traceback | |
| error_msg = f"Visualization error: {str(e)}\n{traceback.format_exc()}" | |
| print(error_msg) | |
| return [gr.Markdown(f"**Error during visualization:**\n\n```\n{error_msg}\n```")] | |