Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from collections import Counter | |
| import matplotlib.ticker as ticker | |
| import gradio as gr | |
| def category_chart(file_path): | |
| # Load the Excel file | |
| df = pd.read_excel(file_path) | |
| # Ensure the 'Topic' column exists and drop any rows without a topic | |
| if 'Topic' not in df.columns or df['Topic'].isnull().all(): | |
| raise ValueError("The 'Topic' column is missing or empty.") | |
| df.dropna(subset=['Topic'], inplace=True) | |
| # Split multiple topics and flatten the list | |
| all_topics = [topic.strip() for sublist in df['Topic'].str.split(',').tolist() for topic in sublist if topic] | |
| # Count occurrences of each topic | |
| topic_counts = Counter(all_topics) | |
| # Convert to DataFrame for plotting | |
| topic_counts_df = pd.DataFrame(topic_counts.items(), columns=['Topic', 'Count']).sort_values('Count', ascending=False) | |
| # Plotting | |
| plt.close('all') | |
| fig, ax = plt.subplots(figsize=(14, 7)) | |
| ax.set_facecolor('#222c52') | |
| fig.patch.set_facecolor('#222c52') | |
| colors = ['#08F7FE' if i % 2 == 0 else '#FE53BB' for i in range(len(topic_counts_df))] | |
| topic_counts_df.plot(kind='bar', x='Topic', y='Count', ax=ax, color=colors, edgecolor=colors, alpha=0.7, linewidth=2, legend=None) | |
| ax.xaxis.label.set_color('white') | |
| ax.yaxis.label.set_color('white') | |
| ax.tick_params(axis='x', colors='white', labelsize=10, direction='out', length=6, width=2, rotation=45) | |
| ax.tick_params(axis='y', colors='white', labelsize=10, direction='out', length=6, width=2) | |
| ax.set_title('Topic Frequency Distribution', color='white', fontsize=16) | |
| ax.set_xlabel('Topic', fontsize=14) | |
| ax.set_ylabel('Count', fontsize=14) | |
| ax.grid(True, which='both', axis='y', color='gray', linestyle='-', linewidth=0.5, alpha=0.5) | |
| ax.set_axisbelow(True) | |
| for spine in ax.spines.values(): | |
| spine.set_color('white') | |
| spine.set_linewidth(1) | |
| ax.spines['right'].set_visible(False) | |
| ax.spines['top'].set_visible(False) | |
| return fig | |
| def status_chart(file_path): | |
| # Load the Excel file | |
| plt.close('all') | |
| data = pd.read_excel(file_path) | |
| # Calculate the frequency of each status | |
| status_counts = data['Status'].value_counts() | |
| # Define colors with 50% opacity | |
| colors = ['#08F7FE80', '#FE53BB80', | |
| '#fff236de', '#90ff00bf'] # '80' for 50% opacity | |
| # Plotting | |
| fig, ax = plt.subplots() | |
| fig.patch.set_facecolor('#222c52') # Set the background color of the figure | |
| ax.set_facecolor('#222c52') # Set the background color of the axes | |
| wedges, texts, autotexts = ax.pie(status_counts, autopct='%1.1f%%', startangle=90, colors=colors, | |
| wedgeprops=dict(edgecolor='white', linewidth=1.5)) | |
| # Set legend | |
| ax.legend(wedges, status_counts.index, title="Document Status", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1)) | |
| ax.set_ylabel('') # Remove the y-label | |
| ax.set_title('Document Status Distribution', color='white') | |
| plt.setp(autotexts, size=8, weight="bold", color="white") | |
| return fig | |
| def plot_glowing_line_with_dots_enhanced(ax, x, y, color, label, glow_size=10, base_linewidth=3, markersize=8): | |
| for i in range(1, glow_size + 1): | |
| alpha_value = (1.0 / glow_size) * (i / (glow_size / 2)) | |
| if alpha_value > 1.0: | |
| alpha_value = 1.0 | |
| linewidth = base_linewidth * i * 0.5 | |
| ax.plot(x, y, color=color, linewidth=linewidth, alpha=alpha_value * 0.1) | |
| ax.plot(x, y, color=color, linewidth=base_linewidth, marker='o', linestyle='-', label=label, markersize=markersize) | |
| def company_document_type(file_path, company_names): | |
| plt.close('all') | |
| if isinstance(company_names, str): | |
| company_names = [name.strip() for name in company_names.split(',')] | |
| df = pd.read_excel(file_path) | |
| fig, ax = plt.subplots(figsize=(14, 8)) | |
| ax.set_facecolor('#222c52') | |
| fig.patch.set_facecolor('#222c52') | |
| colors = ['#08F7FE', '#FE53BB', '#fff236'] # Add more colors if necessary | |
| max_count = 0 | |
| for index, company_name in enumerate(company_names): | |
| df_company = df[df['Source'].str.contains(company_name, case=False, na=False)] | |
| document_counts = df_company['Type'].value_counts() | |
| all_document_types = df['Type'].unique() | |
| document_counts = document_counts.reindex(all_document_types, fill_value=0) | |
| x_data = document_counts.index | |
| y_data = document_counts.values | |
| ax.fill_between(x_data, y_data, -0.2, color=colors[index % len(colors)], alpha=0.1) | |
| plot_glowing_line_with_dots_enhanced(ax, x_data, y_data, colors[index % len(colors)], company_name, base_linewidth=4) | |
| if max_count < max(y_data): | |
| max_count = max(y_data) | |
| ax.set_xticks(range(len(all_document_types))) | |
| ax.set_xticklabels(all_document_types, rotation=45, fontsize=12, fontweight='bold', color='white') | |
| ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True)) | |
| ax.set_ylabel('Count', color='white') | |
| ax.set_title('Document Types Contributed by Companies', color='white') | |
| ax.grid(True, which='both', axis='both', color='gray', linestyle='-', linewidth=0.5, alpha=0.5) | |
| ax.set_axisbelow(True) | |
| plt.ylim(-0.2, max_count + 1) | |
| for spine in ax.spines.values(): | |
| spine.set_color('white') | |
| spine.set_linewidth(2) | |
| ax.spines['right'].set_visible(False) | |
| ax.spines['top'].set_visible(False) | |
| ax.spines['left'].set_position(('data', 0)) | |
| plt.legend(facecolor='#222c52', edgecolor='white', fontsize=12, labelcolor='white') | |
| return fig | |
| def get_expert(file_path): | |
| # Load the Excel file | |
| df = pd.read_excel(file_path) | |
| # Ensure the 'Expert' column exists | |
| if 'Expert' not in df.columns: | |
| raise ValueError("The 'Expert' column is missing from the provided file.") | |
| # Combine all the experts into a single list, accounting for multiple experts per row | |
| all_experts = [] | |
| for experts in df['Expert'].dropna().unique(): | |
| all_experts.extend([expert.strip() for expert in experts.split(',')]) | |
| # Get unique experts and return them | |
| unique_experts = sorted(set(all_experts)) | |
| return gr.update(choices=list(unique_experts)) | |
| def chart_by_expert(file_path, expert_name): | |
| plt.close('all') | |
| # Load the Excel file | |
| data = pd.read_excel(file_path) | |
| # Normalize the expert's name if it follows a specific format; otherwise, adjust accordingly | |
| parts = expert_name.split('/') | |
| name = parts[1].strip() if len(parts) > 1 else expert_name.strip() | |
| # Normalize function for companies, similar to the original code | |
| def normalize_companies(company_list, merge_entities): | |
| normalized = set() | |
| for company in company_list: | |
| normalized_name = merge_entities.get(company.strip(), company.strip()) | |
| normalized.add(normalized_name) | |
| return list(normalized) | |
| # Define merge entities mapping, as provided | |
| merge_entities = { | |
| "Nokia Shanghai Bell": "Nokia", | |
| "Qualcomm Korea": "Qualcomm", | |
| # Add all other mappings as per the original code | |
| # ... | |
| "Hugues Network Systems": "Hughes" | |
| } | |
| # Adjust data processing to handle multiple experts and sources | |
| # Flatten and normalize the source field across relevant rows | |
| data['ExpertsList'] = data['Expert'].dropna().apply(lambda x: [expert.strip() for expert in x.split(',')]) | |
| data_exploded = data.explode('ExpertsList') | |
| # Filter the data for the specified expert and handle multiple sources | |
| filtered_data = data_exploded[data_exploded['ExpertsList'].str.contains(name, case=False, na=False)] | |
| sources = filtered_data['Source'].dropna() | |
| split_sources = sources.apply(lambda x: normalize_companies(x.split(', '), merge_entities)) | |
| all_sources = [company for sublist in split_sources for company in sublist] | |
| # Count occurrences and get the top 10 | |
| source_counts = Counter(all_sources) | |
| top_10_sources = source_counts.most_common(10) | |
| # Convert to DataFrame for plotting | |
| top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count']) | |
| # Plotting | |
| fig, ax = plt.subplots(figsize=(14, 11)) | |
| ax.set_facecolor('#222c52') | |
| fig.patch.set_facecolor('#222c52') | |
| # Alternating colors for the bars | |
| colors = ['#08F7FE' if i % 2 == 0 else '#FE53BB' for i in range(len(top_10_df))] | |
| top_10_df.plot(kind='bar', x='Company', y='Count', ax=ax, color=colors, edgecolor=colors, alpha=0.5, linewidth=5) | |
| # Set chart details | |
| ax.xaxis.label.set_color('white') | |
| ax.yaxis.label.set_color('white') | |
| ax.tick_params(axis='x', colors='white', labelsize=12, direction='out', length=6, width=2, rotation=45) | |
| ax.tick_params(axis='y', colors='white', labelsize=12, direction='out', length=6, width=2) | |
| ax.set_title(f"Top 10 Contributors for Expert '{expert_name}'", color='white', fontsize=16) | |
| ax.set_xlabel('Company', fontsize=14) | |
| ax.set_ylabel('Count', fontsize=14) | |
| ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True)) | |
| ax.grid(True, which='both', axis='y', color='gray', linestyle='-', linewidth=0.5, alpha=0.5) | |
| ax.set_axisbelow(True) | |
| for spine in ax.spines.values(): | |
| spine.set_color('white') | |
| spine.set_linewidth(2) | |
| ax.spines['right'].set_visible(False) | |
| ax.spines['top'].set_visible(False) | |
| return fig | |
| # @title Top 10 des entreprises en termes de publications | |
| def generate_company_chart(file_path): | |
| # plt.close('all') | |
| # Define merge entities mapping | |
| merge_entities = { | |
| "Nokia Shanghai Bell": "Nokia", | |
| "Qualcomm Korea": "Qualcomm", | |
| "Qualcomm Incorporated": "Qualcomm", | |
| "Huawei Technologies R&D UK": "Huawei", | |
| "Hughes Network Systems": "Hughes", | |
| "HUGHES Network Systems": "Hughes", | |
| "Hughes Network systems": "Hughes", | |
| "HUGHES Network Systems Ltd": "Hughes", | |
| "KT Corp.": "KT Corporation", | |
| "Deutsche Telekom AG": "Deutsche Telekom", | |
| "LG Electronics Inc.": "LG Electronics", | |
| "LG Uplus": "LG Electronics", | |
| "OPPO (chongqing) Intelligence": "OPPO", | |
| "Samsung Electronics GmbH": "Samsung", | |
| "China Mobile International Ltd": "China Mobile", | |
| "NOVAMINT": "Novamint", | |
| "Eutelsat": "Eutelsat Group", | |
| "Inmarsat Viasat": "Inmarsat", | |
| "China Telecommunications": "China Telecom", | |
| "SES S.A.": "SES", | |
| "Ericsson GmbH": "Ericsson", | |
| "JSAT": "SKY Perfect JSAT", | |
| "NEC Europe Ltd": "NEC", | |
| "Fraunhofer IIS": "Fraunhofer", | |
| "Hugues Network Systems": "Hughes" | |
| } | |
| # Function to normalize company names within each cell | |
| def normalize_companies(company_list, merge_entities): | |
| normalized = set() # Use a set to avoid duplicates within the same cell | |
| for company in company_list: | |
| normalized_name = merge_entities.get(company.strip(), company.strip()) | |
| normalized.add(normalized_name) | |
| return list(normalized) | |
| # Load the Excel file | |
| data = pd.read_excel(file_path) | |
| # Prepare the data | |
| sources = data['Source'].dropna() | |
| split_sources = sources.apply(lambda x: normalize_companies(x.split(', '), merge_entities)) | |
| # Flatten the list of lists while applying the merge rules | |
| all_sources = [company for sublist in split_sources for company in sublist] | |
| # Count occurrences | |
| source_counts = Counter(all_sources) | |
| top_10_sources = source_counts.most_common(10) | |
| # Convert to DataFrame for plotting | |
| top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count']) | |
| # Plotting | |
| fig, ax = plt.subplots(figsize=(14, 12)) | |
| ax.set_facecolor('#222c52') | |
| fig.patch.set_facecolor('#222c52') | |
| # Alternating colors for the bars | |
| colors = ['#08F7FE' if i % 2 == 0 else '#FE53BB' for i in range(len(top_10_df))] | |
| top_10_df.plot(kind='bar', x='Company', y='Count', ax=ax, color=colors, edgecolor=colors, alpha=0.5, linewidth=5, legend=None) | |
| # Set chart details | |
| ax.xaxis.label.set_color('white') | |
| ax.yaxis.label.set_color('white') | |
| ax.tick_params(axis='x', colors='white', labelsize=16, direction='out', length=6, width=2, rotation=37) | |
| ax.tick_params(axis='y', colors='white', labelsize=12, direction='out', length=6, width=2) | |
| ax.set_title('Top 10 Contributors: Ranking Company Contributions', color='white', fontsize=16) | |
| ax.set_xlabel('Company', fontsize=14) | |
| ax.set_ylabel('Count', fontsize=14) | |
| ax.grid(True, which='both', axis='y', color='gray', linestyle='-', linewidth=0.5, alpha=0.5) | |
| ax.set_axisbelow(True) | |
| for spine in ax.spines.values(): | |
| spine.set_color('white') | |
| spine.set_linewidth(2) | |
| ax.spines['right'].set_visible(False) | |
| ax.spines['top'].set_visible(False) | |
| #plt.show() | |
| return fig | |