Spaces:
Sleeping
Sleeping
Update charts_advanced.py
Browse files- charts_advanced.py +65 -83
charts_advanced.py
CHANGED
|
@@ -4,63 +4,48 @@ from collections import Counter
|
|
| 4 |
import matplotlib.ticker as ticker
|
| 5 |
|
| 6 |
def category_chart(file_path):
|
| 7 |
-
plt.close('all')
|
| 8 |
-
# Define expert to specialty mapping
|
| 9 |
-
expert_specialties = {
|
| 10 |
-
"mireille": "Security Trust",
|
| 11 |
-
"khawla": "Network Security",
|
| 12 |
-
"guillaume": "Distributed Networks",
|
| 13 |
-
"vincent": "USIM Management",
|
| 14 |
-
"pierre": "Eco-Design",
|
| 15 |
-
"ly-thanh": "Trend Analysis",
|
| 16 |
-
"nicolas": "Satellite Networks",
|
| 17 |
-
"dorin": "Emergency Communication"
|
| 18 |
-
}
|
| 19 |
-
|
| 20 |
# Load the Excel file
|
| 21 |
-
|
| 22 |
|
| 23 |
-
#
|
| 24 |
-
|
| 25 |
-
|
| 26 |
|
| 27 |
-
|
| 28 |
-
specialties = experts.apply(lambda expert: expert_specialties.get(expert.strip(), "Other"))
|
| 29 |
|
| 30 |
-
#
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
# Convert to DataFrame for plotting
|
| 34 |
-
|
| 35 |
-
specialty_counts_df.columns = ['Specialty', 'Count']
|
| 36 |
|
| 37 |
# Plotting
|
| 38 |
-
|
| 39 |
-
fig, ax = plt.subplots(figsize=(14,
|
| 40 |
ax.set_facecolor('#222c52')
|
| 41 |
fig.patch.set_facecolor('#222c52')
|
| 42 |
|
| 43 |
-
#
|
| 44 |
-
|
| 45 |
-
specialty_counts_df.plot(kind='bar', x='Specialty', y='Count', ax=ax, color=colors, edgecolor=colors, alpha=0.5, linewidth=5, legend=None)
|
| 46 |
|
| 47 |
-
# Set chart details
|
| 48 |
ax.xaxis.label.set_color('white')
|
| 49 |
ax.yaxis.label.set_color('white')
|
| 50 |
-
ax.tick_params(axis='x', colors='white', labelsize=
|
| 51 |
-
ax.tick_params(axis='y', colors='white', labelsize=
|
| 52 |
-
ax.set_title('
|
| 53 |
-
ax.set_xlabel('
|
| 54 |
ax.set_ylabel('Count', fontsize=14)
|
| 55 |
ax.grid(True, which='both', axis='y', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
|
| 56 |
ax.set_axisbelow(True)
|
| 57 |
|
| 58 |
for spine in ax.spines.values():
|
| 59 |
spine.set_color('white')
|
| 60 |
-
spine.set_linewidth(
|
| 61 |
ax.spines['right'].set_visible(False)
|
| 62 |
ax.spines['top'].set_visible(False)
|
| 63 |
-
|
| 64 |
return fig
|
| 65 |
|
| 66 |
|
|
@@ -108,17 +93,16 @@ def plot_glowing_line_with_dots_enhanced(ax, x, y, color, label, glow_size=10, b
|
|
| 108 |
|
| 109 |
def company_document_type(file_path, company_names):
|
| 110 |
plt.close('all')
|
| 111 |
-
# Convert company_names to a list if it's a string
|
| 112 |
if isinstance(company_names, str):
|
| 113 |
-
company_names = [name.strip() for name in company_names.split(',')]
|
| 114 |
|
| 115 |
df = pd.read_excel(file_path)
|
| 116 |
-
|
| 117 |
fig, ax = plt.subplots(figsize=(14, 8))
|
| 118 |
ax.set_facecolor('#222c52')
|
| 119 |
fig.patch.set_facecolor('#222c52')
|
| 120 |
|
| 121 |
-
colors = ['#08F7FE', '#FE53BB', '#fff236'] #
|
| 122 |
|
| 123 |
max_count = 0
|
| 124 |
for index, company_name in enumerate(company_names):
|
|
@@ -136,10 +120,10 @@ def company_document_type(file_path, company_names):
|
|
| 136 |
max_count = max(y_data)
|
| 137 |
|
| 138 |
ax.set_xticks(range(len(all_document_types)))
|
| 139 |
-
ax.set_xticklabels(all_document_types, rotation=45, fontsize=12, fontweight='bold')
|
| 140 |
ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
|
| 141 |
ax.set_ylabel('Count', color='white')
|
| 142 |
-
ax.set_title('Document Types Contributed by Companies')
|
| 143 |
ax.grid(True, which='both', axis='both', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
|
| 144 |
ax.set_axisbelow(True)
|
| 145 |
|
|
@@ -152,68 +136,66 @@ def company_document_type(file_path, company_names):
|
|
| 152 |
ax.spines['right'].set_visible(False)
|
| 153 |
ax.spines['top'].set_visible(False)
|
| 154 |
ax.spines['left'].set_position(('data', 0))
|
| 155 |
-
plt.legend(facecolor='#222c52', edgecolor='white', fontsize=12)
|
| 156 |
|
| 157 |
return fig
|
| 158 |
|
| 159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
def chart_by_expert(file_path, expert_name):
|
| 162 |
plt.close('all')
|
| 163 |
# Load the Excel file
|
| 164 |
data = pd.read_excel(file_path)
|
| 165 |
-
|
|
|
|
| 166 |
parts = expert_name.split('/')
|
|
|
|
| 167 |
|
| 168 |
-
#
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
-
# Define merge entities mapping
|
| 174 |
merge_entities = {
|
| 175 |
"Nokia Shanghai Bell": "Nokia",
|
| 176 |
"Qualcomm Korea": "Qualcomm",
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
"Hughes Network Systems": "Hughes",
|
| 180 |
-
"HUGHES Network Systems": "Hughes",
|
| 181 |
-
"Hughes Network systems": "Hughes",
|
| 182 |
-
"HUGHES Network Systems Ltd": "Hughes",
|
| 183 |
-
"KT Corp.": "KT Corporation",
|
| 184 |
-
"LG Electronics Inc.": "LG Electronics",
|
| 185 |
-
"LG Uplus": "LG Electronics",
|
| 186 |
-
"OPPO (chongqing) Intelligence": "OPPO",
|
| 187 |
-
"Samsung Electronics GmbH": "Samsung",
|
| 188 |
-
"China Mobile International Ltd": "China Mobile",
|
| 189 |
-
"NOVAMINT": "Novamint",
|
| 190 |
-
"Eutelsat": "Eutelsat Group",
|
| 191 |
-
"Inmarsat Viasat": "Inmarsat",
|
| 192 |
-
"China Telecommunications": "China Telecom",
|
| 193 |
-
"SES S.A.": "SES",
|
| 194 |
-
"Ericsson GmbH": "Ericsson",
|
| 195 |
-
"JSAT": "SKY Perfect JSAT",
|
| 196 |
-
"NEC Europe Ltd": "NEC",
|
| 197 |
-
"Fraunhofer IIS": "Fraunhofer",
|
| 198 |
"Hugues Network Systems": "Hughes"
|
| 199 |
}
|
| 200 |
|
| 201 |
-
#
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
normalized_name = merge_entities.get(company.strip(), company.strip())
|
| 206 |
-
normalized.add(normalized_name)
|
| 207 |
-
return list(normalized)
|
| 208 |
|
| 209 |
-
#
|
|
|
|
| 210 |
sources = filtered_data['Source'].dropna()
|
| 211 |
split_sources = sources.apply(lambda x: normalize_companies(x.split(', '), merge_entities))
|
| 212 |
-
|
| 213 |
-
# Flatten the list of lists while applying the merge rules
|
| 214 |
all_sources = [company for sublist in split_sources for company in sublist]
|
| 215 |
|
| 216 |
-
# Count occurrences
|
| 217 |
source_counts = Counter(all_sources)
|
| 218 |
top_10_sources = source_counts.most_common(10)
|
| 219 |
|
|
@@ -221,7 +203,6 @@ def chart_by_expert(file_path, expert_name):
|
|
| 221 |
top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])
|
| 222 |
|
| 223 |
# Plotting
|
| 224 |
-
#plt.style.use('dark_background')
|
| 225 |
fig, ax = plt.subplots(figsize=(14, 11))
|
| 226 |
ax.set_facecolor('#222c52')
|
| 227 |
fig.patch.set_facecolor('#222c52')
|
|
@@ -235,7 +216,7 @@ def chart_by_expert(file_path, expert_name):
|
|
| 235 |
ax.yaxis.label.set_color('white')
|
| 236 |
ax.tick_params(axis='x', colors='white', labelsize=12, direction='out', length=6, width=2, rotation=45)
|
| 237 |
ax.tick_params(axis='y', colors='white', labelsize=12, direction='out', length=6, width=2)
|
| 238 |
-
ax.set_title(f"Top 10
|
| 239 |
ax.set_xlabel('Company', fontsize=14)
|
| 240 |
ax.set_ylabel('Count', fontsize=14)
|
| 241 |
ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
|
|
@@ -252,6 +233,7 @@ def chart_by_expert(file_path, expert_name):
|
|
| 252 |
|
| 253 |
|
| 254 |
|
|
|
|
| 255 |
# @title Top 10 des entreprises en termes de publications
|
| 256 |
|
| 257 |
|
|
@@ -313,7 +295,6 @@ def generate_company_chart(file_path):
|
|
| 313 |
top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])
|
| 314 |
|
| 315 |
# Plotting
|
| 316 |
-
#plt.style.use('dark_background')
|
| 317 |
fig, ax = plt.subplots(figsize=(14, 12))
|
| 318 |
ax.set_facecolor('#222c52')
|
| 319 |
fig.patch.set_facecolor('#222c52')
|
|
@@ -341,3 +322,4 @@ def generate_company_chart(file_path):
|
|
| 341 |
|
| 342 |
#plt.show()
|
| 343 |
return fig
|
|
|
|
|
|
| 4 |
import matplotlib.ticker as ticker
|
| 5 |
|
| 6 |
def category_chart(file_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
# Load the Excel file
|
| 8 |
+
df = pd.read_excel(file_path)
|
| 9 |
|
| 10 |
+
# Ensure the 'Topic' column exists and drop any rows without a topic
|
| 11 |
+
if 'Topic' not in df.columns or df['Topic'].isnull().all():
|
| 12 |
+
raise ValueError("The 'Topic' column is missing or empty.")
|
| 13 |
|
| 14 |
+
df.dropna(subset=['Topic'], inplace=True)
|
|
|
|
| 15 |
|
| 16 |
+
# Split multiple topics and flatten the list
|
| 17 |
+
all_topics = [topic.strip() for sublist in df['Topic'].str.split(',').tolist() for topic in sublist if topic]
|
| 18 |
+
|
| 19 |
+
# Count occurrences of each topic
|
| 20 |
+
topic_counts = Counter(all_topics)
|
| 21 |
|
| 22 |
# Convert to DataFrame for plotting
|
| 23 |
+
topic_counts_df = pd.DataFrame(topic_counts.items(), columns=['Topic', 'Count']).sort_values('Count', ascending=False)
|
|
|
|
| 24 |
|
| 25 |
# Plotting
|
| 26 |
+
plt.close('all')
|
| 27 |
+
fig, ax = plt.subplots(figsize=(14, 7))
|
| 28 |
ax.set_facecolor('#222c52')
|
| 29 |
fig.patch.set_facecolor('#222c52')
|
| 30 |
|
| 31 |
+
colors = ['#08F7FE' if i % 2 == 0 else '#FE53BB' for i in range(len(topic_counts_df))]
|
| 32 |
+
topic_counts_df.plot(kind='bar', x='Topic', y='Count', ax=ax, color=colors, edgecolor=colors, alpha=0.7, linewidth=2, legend=None)
|
|
|
|
| 33 |
|
|
|
|
| 34 |
ax.xaxis.label.set_color('white')
|
| 35 |
ax.yaxis.label.set_color('white')
|
| 36 |
+
ax.tick_params(axis='x', colors='white', labelsize=10, direction='out', length=6, width=2, rotation=45)
|
| 37 |
+
ax.tick_params(axis='y', colors='white', labelsize=10, direction='out', length=6, width=2)
|
| 38 |
+
ax.set_title('Topic Frequency Distribution', color='white', fontsize=16)
|
| 39 |
+
ax.set_xlabel('Topic', fontsize=14)
|
| 40 |
ax.set_ylabel('Count', fontsize=14)
|
| 41 |
ax.grid(True, which='both', axis='y', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
|
| 42 |
ax.set_axisbelow(True)
|
| 43 |
|
| 44 |
for spine in ax.spines.values():
|
| 45 |
spine.set_color('white')
|
| 46 |
+
spine.set_linewidth(1)
|
| 47 |
ax.spines['right'].set_visible(False)
|
| 48 |
ax.spines['top'].set_visible(False)
|
|
|
|
| 49 |
return fig
|
| 50 |
|
| 51 |
|
|
|
|
| 93 |
|
| 94 |
def company_document_type(file_path, company_names):
|
| 95 |
plt.close('all')
|
|
|
|
| 96 |
if isinstance(company_names, str):
|
| 97 |
+
company_names = [name.strip() for name in company_names.split(',')]
|
| 98 |
|
| 99 |
df = pd.read_excel(file_path)
|
| 100 |
+
|
| 101 |
fig, ax = plt.subplots(figsize=(14, 8))
|
| 102 |
ax.set_facecolor('#222c52')
|
| 103 |
fig.patch.set_facecolor('#222c52')
|
| 104 |
|
| 105 |
+
colors = ['#08F7FE', '#FE53BB', '#fff236'] # Add more colors if necessary
|
| 106 |
|
| 107 |
max_count = 0
|
| 108 |
for index, company_name in enumerate(company_names):
|
|
|
|
| 120 |
max_count = max(y_data)
|
| 121 |
|
| 122 |
ax.set_xticks(range(len(all_document_types)))
|
| 123 |
+
ax.set_xticklabels(all_document_types, rotation=45, fontsize=12, fontweight='bold', color='white')
|
| 124 |
ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
|
| 125 |
ax.set_ylabel('Count', color='white')
|
| 126 |
+
ax.set_title('Document Types Contributed by Companies', color='white')
|
| 127 |
ax.grid(True, which='both', axis='both', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
|
| 128 |
ax.set_axisbelow(True)
|
| 129 |
|
|
|
|
| 136 |
ax.spines['right'].set_visible(False)
|
| 137 |
ax.spines['top'].set_visible(False)
|
| 138 |
ax.spines['left'].set_position(('data', 0))
|
| 139 |
+
plt.legend(facecolor='#222c52', edgecolor='white', fontsize=12, labelcolor='white')
|
| 140 |
|
| 141 |
return fig
|
| 142 |
|
| 143 |
|
| 144 |
+
def get_expert(file_path):
|
| 145 |
+
# Load the Excel file
|
| 146 |
+
df = pd.read_excel(file_path)
|
| 147 |
+
|
| 148 |
+
# Ensure the 'Expert' column exists
|
| 149 |
+
if 'Expert' not in df.columns:
|
| 150 |
+
raise ValueError("The 'Expert' column is missing from the provided file.")
|
| 151 |
+
|
| 152 |
+
# Combine all the experts into a single list, accounting for multiple experts per row
|
| 153 |
+
all_experts = []
|
| 154 |
+
for experts in df['Expert'].dropna().unique():
|
| 155 |
+
all_experts.extend([expert.strip() for expert in experts.split(',')])
|
| 156 |
+
|
| 157 |
+
# Get unique experts and return them
|
| 158 |
+
unique_experts = sorted(set(all_experts))
|
| 159 |
+
return gr.update(choices=list(unique_experts))
|
| 160 |
|
| 161 |
def chart_by_expert(file_path, expert_name):
|
| 162 |
plt.close('all')
|
| 163 |
# Load the Excel file
|
| 164 |
data = pd.read_excel(file_path)
|
| 165 |
+
|
| 166 |
+
# Normalize the expert's name if it follows a specific format; otherwise, adjust accordingly
|
| 167 |
parts = expert_name.split('/')
|
| 168 |
+
name = parts[1].strip() if len(parts) > 1 else expert_name.strip()
|
| 169 |
|
| 170 |
+
# Normalize function for companies, similar to the original code
|
| 171 |
+
def normalize_companies(company_list, merge_entities):
|
| 172 |
+
normalized = set()
|
| 173 |
+
for company in company_list:
|
| 174 |
+
normalized_name = merge_entities.get(company.strip(), company.strip())
|
| 175 |
+
normalized.add(normalized_name)
|
| 176 |
+
return list(normalized)
|
| 177 |
|
| 178 |
+
# Define merge entities mapping, as provided
|
| 179 |
merge_entities = {
|
| 180 |
"Nokia Shanghai Bell": "Nokia",
|
| 181 |
"Qualcomm Korea": "Qualcomm",
|
| 182 |
+
# Add all other mappings as per the original code
|
| 183 |
+
# ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
"Hugues Network Systems": "Hughes"
|
| 185 |
}
|
| 186 |
|
| 187 |
+
# Adjust data processing to handle multiple experts and sources
|
| 188 |
+
# Flatten and normalize the source field across relevant rows
|
| 189 |
+
data['ExpertsList'] = data['Expert'].dropna().apply(lambda x: [expert.strip() for expert in x.split(',')])
|
| 190 |
+
data_exploded = data.explode('ExpertsList')
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
+
# Filter the data for the specified expert and handle multiple sources
|
| 193 |
+
filtered_data = data_exploded[data_exploded['ExpertsList'].str.contains(name, case=False, na=False)]
|
| 194 |
sources = filtered_data['Source'].dropna()
|
| 195 |
split_sources = sources.apply(lambda x: normalize_companies(x.split(', '), merge_entities))
|
|
|
|
|
|
|
| 196 |
all_sources = [company for sublist in split_sources for company in sublist]
|
| 197 |
|
| 198 |
+
# Count occurrences and get the top 10
|
| 199 |
source_counts = Counter(all_sources)
|
| 200 |
top_10_sources = source_counts.most_common(10)
|
| 201 |
|
|
|
|
| 203 |
top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])
|
| 204 |
|
| 205 |
# Plotting
|
|
|
|
| 206 |
fig, ax = plt.subplots(figsize=(14, 11))
|
| 207 |
ax.set_facecolor('#222c52')
|
| 208 |
fig.patch.set_facecolor('#222c52')
|
|
|
|
| 216 |
ax.yaxis.label.set_color('white')
|
| 217 |
ax.tick_params(axis='x', colors='white', labelsize=12, direction='out', length=6, width=2, rotation=45)
|
| 218 |
ax.tick_params(axis='y', colors='white', labelsize=12, direction='out', length=6, width=2)
|
| 219 |
+
ax.set_title(f"Top 10 Contributors for Expert '{expert_name}'", color='white', fontsize=16)
|
| 220 |
ax.set_xlabel('Company', fontsize=14)
|
| 221 |
ax.set_ylabel('Count', fontsize=14)
|
| 222 |
ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
|
|
|
|
| 233 |
|
| 234 |
|
| 235 |
|
| 236 |
+
|
| 237 |
# @title Top 10 des entreprises en termes de publications
|
| 238 |
|
| 239 |
|
|
|
|
| 295 |
top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])
|
| 296 |
|
| 297 |
# Plotting
|
|
|
|
| 298 |
fig, ax = plt.subplots(figsize=(14, 12))
|
| 299 |
ax.set_facecolor('#222c52')
|
| 300 |
fig.patch.set_facecolor('#222c52')
|
|
|
|
| 322 |
|
| 323 |
#plt.show()
|
| 324 |
return fig
|
| 325 |
+
|