Spaces:

Almaatla
/

Standard_Intelligence_Dev

Sleeping

App Files Files Community

MaksG commited on Mar 12, 2024

Commit

779436a

verified ·

1 Parent(s): 7e6ba94

Update charts_advanced.py

Browse files

Files changed (1) hide show

charts_advanced.py +65 -83

charts_advanced.py CHANGED Viewed

@@ -4,63 +4,48 @@ from collections import Counter
 import matplotlib.ticker as ticker
 def category_chart(file_path):
-    plt.close('all')
-    # Define expert to specialty mapping
-    expert_specialties = {
-        "mireille": "Security Trust",
-        "khawla": "Network Security",
-        "guillaume": "Distributed Networks",
-        "vincent": "USIM Management",
-        "pierre": "Eco-Design",
-        "ly-thanh": "Trend Analysis",
-        "nicolas": "Satellite Networks",
-        "dorin": "Emergency Communication"
-    }
     # Load the Excel file
-    data = pd.read_excel(file_path)
-    # Assuming experts are listed in a column named 'Experts'
-    # This part might need to be adjusted based on the actual structure of your Excel file
-    experts = data['Expert'].dropna()
-    # Map experts to their specialties
-    specialties = experts.apply(lambda expert: expert_specialties.get(expert.strip(), "Other"))
-    # Count occurrences
-    specialty_counts = specialties.value_counts()
     # Convert to DataFrame for plotting
-    specialty_counts_df = specialty_counts.reset_index()
-    specialty_counts_df.columns = ['Specialty', 'Count']
     # Plotting
-    fig, ax = plt.subplots(figsize=(14, 14))
     ax.set_facecolor('#222c52')
     fig.patch.set_facecolor('#222c52')
-    # Alternating colors for the bars
-    colors = ['#08F7FE' if i % 2 == 0 else '#FE53BB' for i in range(len(specialty_counts_df))]
-    specialty_counts_df.plot(kind='bar', x='Specialty', y='Count', ax=ax, color=colors, edgecolor=colors, alpha=0.5, linewidth=5, legend=None)
-    # Set chart details
     ax.xaxis.label.set_color('white')
     ax.yaxis.label.set_color('white')
-    ax.tick_params(axis='x', colors='white', labelsize=12, direction='out', length=6, width=2, rotation=42)
-    ax.tick_params(axis='y', colors='white', labelsize=12, direction='out', length=6, width=2)
-    ax.set_title('Most Used Expert Specialties', color='white', fontsize=16)
-    ax.set_xlabel('Specialty', fontsize=14)
     ax.set_ylabel('Count', fontsize=14)
     ax.grid(True, which='both', axis='y', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
     ax.set_axisbelow(True)
     for spine in ax.spines.values():
         spine.set_color('white')
-        spine.set_linewidth(2)
     ax.spines['right'].set_visible(False)
     ax.spines['top'].set_visible(False)
     return fig
@@ -108,17 +93,16 @@ def plot_glowing_line_with_dots_enhanced(ax, x, y, color, label, glow_size=10, b
 def company_document_type(file_path, company_names):
     plt.close('all')
-    # Convert company_names to a list if it's a string
     if isinstance(company_names, str):
-        company_names = [name.strip() for name in company_names.split(',')]  # Ensure it's a list even for single company name
     df = pd.read_excel(file_path)
     fig, ax = plt.subplots(figsize=(14, 8))
     ax.set_facecolor('#222c52')
     fig.patch.set_facecolor('#222c52')
-    colors = ['#08F7FE', '#FE53BB', '#fff236']  # Assign more colors for more companies
     max_count = 0
     for index, company_name in enumerate(company_names):
@@ -136,10 +120,10 @@ def company_document_type(file_path, company_names):
             max_count = max(y_data)
     ax.set_xticks(range(len(all_document_types)))
-    ax.set_xticklabels(all_document_types, rotation=45, fontsize=12, fontweight='bold')
     ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
     ax.set_ylabel('Count', color='white')
-    ax.set_title('Document Types Contributed by Companies')
     ax.grid(True, which='both', axis='both', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
     ax.set_axisbelow(True)
@@ -152,68 +136,66 @@ def company_document_type(file_path, company_names):
     ax.spines['right'].set_visible(False)
     ax.spines['top'].set_visible(False)
     ax.spines['left'].set_position(('data', 0))
-    plt.legend(facecolor='#222c52', edgecolor='white', fontsize=12)
     return fig
 def chart_by_expert(file_path, expert_name):
     plt.close('all')
     # Load the Excel file
     data = pd.read_excel(file_path)
     parts = expert_name.split('/')
-# The name would be the second part, trim spaces
-    name = parts[1].strip()
-    # Filter data for the specified expert
-    filtered_data = data[data['Expert'] == name.lower()]
-    # Define merge entities mapping
     merge_entities = {
         "Nokia Shanghai Bell": "Nokia",
         "Qualcomm Korea": "Qualcomm",
-        "Qualcomm Incorporated": "Qualcomm",
-        "Huawei Technologies R&D UK": "Huawei",
-        "Hughes Network Systems": "Hughes",
-        "HUGHES Network Systems": "Hughes",
-        "Hughes Network systems": "Hughes",
-        "HUGHES Network Systems Ltd": "Hughes",
-        "KT Corp.": "KT Corporation",
-        "LG Electronics Inc.": "LG Electronics",
-        "LG Uplus": "LG Electronics",
-        "OPPO (chongqing) Intelligence": "OPPO",
-        "Samsung Electronics GmbH": "Samsung",
-        "China Mobile International Ltd": "China Mobile",
-        "NOVAMINT": "Novamint",
-        "Eutelsat": "Eutelsat Group",
-        "Inmarsat Viasat": "Inmarsat",
-        "China Telecommunications": "China Telecom",
-        "SES S.A.": "SES",
-        "Ericsson GmbH": "Ericsson",
-        "JSAT": "SKY Perfect JSAT",
-        "NEC Europe Ltd": "NEC",
-        "Fraunhofer IIS": "Fraunhofer",
         "Hugues Network Systems": "Hughes"
     }
-    # Normalize company names within each cell
-    def normalize_companies(company_list, merge_entities):
-        normalized = set()  # Use a set to avoid duplicates within the same cell
-        for company in company_list:
-            normalized_name = merge_entities.get(company.strip(), company.strip())
-            normalized.add(normalized_name)
-        return list(normalized)
-    # Prepare the filtered data
     sources = filtered_data['Source'].dropna()
     split_sources = sources.apply(lambda x: normalize_companies(x.split(', '), merge_entities))
-    # Flatten the list of lists while applying the merge rules
     all_sources = [company for sublist in split_sources for company in sublist]
-    # Count occurrences
     source_counts = Counter(all_sources)
     top_10_sources = source_counts.most_common(10)
@@ -221,7 +203,6 @@ def chart_by_expert(file_path, expert_name):
     top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])
     # Plotting
-    #plt.style.use('dark_background')
     fig, ax = plt.subplots(figsize=(14, 11))
     ax.set_facecolor('#222c52')
     fig.patch.set_facecolor('#222c52')
@@ -235,7 +216,7 @@ def chart_by_expert(file_path, expert_name):
     ax.yaxis.label.set_color('white')
     ax.tick_params(axis='x', colors='white', labelsize=12, direction='out', length=6, width=2, rotation=45)
     ax.tick_params(axis='y', colors='white', labelsize=12, direction='out', length=6, width=2)
-    ax.set_title(f"Top 10 Cotributors for Expert '{expert_name}'", color='white', fontsize=16)
     ax.set_xlabel('Company', fontsize=14)
     ax.set_ylabel('Count', fontsize=14)
     ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
@@ -252,6 +233,7 @@ def chart_by_expert(file_path, expert_name):
 # @title Top 10 des entreprises en termes de publications
@@ -313,7 +295,6 @@ def generate_company_chart(file_path):
     top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])
     # Plotting
-    #plt.style.use('dark_background')
     fig, ax = plt.subplots(figsize=(14, 12))
     ax.set_facecolor('#222c52')
     fig.patch.set_facecolor('#222c52')
@@ -341,3 +322,4 @@ def generate_company_chart(file_path):
     #plt.show()
     return fig

 import matplotlib.ticker as ticker
 def category_chart(file_path):
     # Load the Excel file
+    df = pd.read_excel(file_path)
+    # Ensure the 'Topic' column exists and drop any rows without a topic
+    if 'Topic' not in df.columns or df['Topic'].isnull().all():
+        raise ValueError("The 'Topic' column is missing or empty.")
+    df.dropna(subset=['Topic'], inplace=True)
+    # Split multiple topics and flatten the list
+    all_topics = [topic.strip() for sublist in df['Topic'].str.split(',').tolist() for topic in sublist if topic]
+    # Count occurrences of each topic
+    topic_counts = Counter(all_topics)
     # Convert to DataFrame for plotting
+    topic_counts_df = pd.DataFrame(topic_counts.items(), columns=['Topic', 'Count']).sort_values('Count', ascending=False)
     # Plotting
+    plt.close('all')
+    fig, ax = plt.subplots(figsize=(14, 7))
     ax.set_facecolor('#222c52')
     fig.patch.set_facecolor('#222c52')
+    colors = ['#08F7FE' if i % 2 == 0 else '#FE53BB' for i in range(len(topic_counts_df))]
+    topic_counts_df.plot(kind='bar', x='Topic', y='Count', ax=ax, color=colors, edgecolor=colors, alpha=0.7, linewidth=2, legend=None)
     ax.xaxis.label.set_color('white')
     ax.yaxis.label.set_color('white')
+    ax.tick_params(axis='x', colors='white', labelsize=10, direction='out', length=6, width=2, rotation=45)
+    ax.tick_params(axis='y', colors='white', labelsize=10, direction='out', length=6, width=2)
+    ax.set_title('Topic Frequency Distribution', color='white', fontsize=16)
+    ax.set_xlabel('Topic', fontsize=14)
     ax.set_ylabel('Count', fontsize=14)
     ax.grid(True, which='both', axis='y', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
     ax.set_axisbelow(True)
     for spine in ax.spines.values():
         spine.set_color('white')
+        spine.set_linewidth(1)
     ax.spines['right'].set_visible(False)
     ax.spines['top'].set_visible(False)
     return fig
 def company_document_type(file_path, company_names):
     plt.close('all')
     if isinstance(company_names, str):
+        company_names = [name.strip() for name in company_names.split(',')]
     df = pd.read_excel(file_path)
     fig, ax = plt.subplots(figsize=(14, 8))
     ax.set_facecolor('#222c52')
     fig.patch.set_facecolor('#222c52')
+    colors = ['#08F7FE', '#FE53BB', '#fff236']  # Add more colors if necessary
     max_count = 0
     for index, company_name in enumerate(company_names):
             max_count = max(y_data)
     ax.set_xticks(range(len(all_document_types)))
+    ax.set_xticklabels(all_document_types, rotation=45, fontsize=12, fontweight='bold', color='white')
     ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
     ax.set_ylabel('Count', color='white')
+    ax.set_title('Document Types Contributed by Companies', color='white')
     ax.grid(True, which='both', axis='both', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
     ax.set_axisbelow(True)
     ax.spines['right'].set_visible(False)
     ax.spines['top'].set_visible(False)
     ax.spines['left'].set_position(('data', 0))
+    plt.legend(facecolor='#222c52', edgecolor='white', fontsize=12, labelcolor='white')
     return fig
+def get_expert(file_path):
+    # Load the Excel file
+    df = pd.read_excel(file_path)
+    # Ensure the 'Expert' column exists
+    if 'Expert' not in df.columns:
+        raise ValueError("The 'Expert' column is missing from the provided file.")
+    # Combine all the experts into a single list, accounting for multiple experts per row
+    all_experts = []
+    for experts in df['Expert'].dropna().unique():
+        all_experts.extend([expert.strip() for expert in experts.split(',')])
+    # Get unique experts and return them
+    unique_experts = sorted(set(all_experts))
+    return gr.update(choices=list(unique_experts))
 def chart_by_expert(file_path, expert_name):
     plt.close('all')
     # Load the Excel file
     data = pd.read_excel(file_path)
+    # Normalize the expert's name if it follows a specific format; otherwise, adjust accordingly
     parts = expert_name.split('/')
+    name = parts[1].strip() if len(parts) > 1 else expert_name.strip()
+    # Normalize function for companies, similar to the original code
+    def normalize_companies(company_list, merge_entities):
+        normalized = set()
+        for company in company_list:
+            normalized_name = merge_entities.get(company.strip(), company.strip())
+            normalized.add(normalized_name)
+        return list(normalized)
+    # Define merge entities mapping, as provided
     merge_entities = {
         "Nokia Shanghai Bell": "Nokia",
         "Qualcomm Korea": "Qualcomm",
+        # Add all other mappings as per the original code
+        # ...
         "Hugues Network Systems": "Hughes"
     }
+    # Adjust data processing to handle multiple experts and sources
+    # Flatten and normalize the source field across relevant rows
+    data['ExpertsList'] = data['Expert'].dropna().apply(lambda x: [expert.strip() for expert in x.split(',')])
+    data_exploded = data.explode('ExpertsList')
+    # Filter the data for the specified expert and handle multiple sources
+    filtered_data = data_exploded[data_exploded['ExpertsList'].str.contains(name, case=False, na=False)]
     sources = filtered_data['Source'].dropna()
     split_sources = sources.apply(lambda x: normalize_companies(x.split(', '), merge_entities))
     all_sources = [company for sublist in split_sources for company in sublist]
+    # Count occurrences and get the top 10
     source_counts = Counter(all_sources)
     top_10_sources = source_counts.most_common(10)
     top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])
     # Plotting
     fig, ax = plt.subplots(figsize=(14, 11))
     ax.set_facecolor('#222c52')
     fig.patch.set_facecolor('#222c52')
     ax.yaxis.label.set_color('white')
     ax.tick_params(axis='x', colors='white', labelsize=12, direction='out', length=6, width=2, rotation=45)
     ax.tick_params(axis='y', colors='white', labelsize=12, direction='out', length=6, width=2)
+    ax.set_title(f"Top 10 Contributors for Expert '{expert_name}'", color='white', fontsize=16)
     ax.set_xlabel('Company', fontsize=14)
     ax.set_ylabel('Count', fontsize=14)
     ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
 # @title Top 10 des entreprises en termes de publications
     top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])
     # Plotting
     fig, ax = plt.subplots(figsize=(14, 12))
     ax.set_facecolor('#222c52')
     fig.patch.set_facecolor('#222c52')
     #plt.show()
     return fig