Spaces:
Sleeping
Sleeping
new model and code structure
Browse files- functions.py +73 -0
- indicator_harmonizer.ipynb +475 -0
functions.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import re
|
| 4 |
+
import numpy as np
|
| 5 |
+
import importlib
|
| 6 |
+
import importlib
|
| 7 |
+
from seatable_api import Base, context
|
| 8 |
+
from pandas import json_normalize
|
| 9 |
+
from transformers import AutoTokenizer, AutoModel
|
| 10 |
+
import torch
|
| 11 |
+
import torch.nn.functional as F
|
| 12 |
+
|
| 13 |
+
### Parameters not expected to be changed in every run
|
| 14 |
+
|
| 15 |
+
# columns to use for embeddings on table 1
|
| 16 |
+
|
| 17 |
+
columns_embeddings_col1 = ['Indicator Name']
|
| 18 |
+
|
| 19 |
+
# columns to use for embeddings on table 2
|
| 20 |
+
columns_embeddings_col2 = ['Indicator name (leonardo)']
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# ID column
|
| 24 |
+
|
| 25 |
+
table1_id_col = ['Indicator ID']
|
| 26 |
+
table2_id_col = ['ID']
|
| 27 |
+
|
| 28 |
+
#### Functions
|
| 29 |
+
|
| 30 |
+
from numpy.linalg import norm
|
| 31 |
+
|
| 32 |
+
# Define cosine similarity function
|
| 33 |
+
cos_sim = lambda a, b: (a @ b.T) / (norm(a) * norm(b))
|
| 34 |
+
|
| 35 |
+
def concatenate_columns(df, columns):
|
| 36 |
+
# Check if all specified columns exist in the DataFrame
|
| 37 |
+
if not all(col in df.columns for col in columns):
|
| 38 |
+
raise ValueError("One or more specified columns do not exist in the DataFrame")
|
| 39 |
+
|
| 40 |
+
# Concatenate the specified columns with a period as the separator
|
| 41 |
+
df['concatenated_input'] = df[columns].astype(str).agg('.'.join, axis=1)
|
| 42 |
+
return df
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# Define the function for mean pooling
|
| 46 |
+
def mean_pooling(model_output, attention_mask):
|
| 47 |
+
token_embeddings = model_output[0] # First element of model_output contains last hidden states
|
| 48 |
+
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
|
| 49 |
+
sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
|
| 50 |
+
sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
| 51 |
+
return sum_embeddings / sum_mask
|
| 52 |
+
|
| 53 |
+
# Define your get_embbedings function
|
| 54 |
+
def get_embbedings(table, colname):
|
| 55 |
+
# Initialize tokenizer and model
|
| 56 |
+
# Load model from HuggingFace Hub
|
| 57 |
+
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
|
| 58 |
+
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
|
| 59 |
+
|
| 60 |
+
# Tokenize sentences
|
| 61 |
+
encoded_input = tokenizer(table[colname].tolist(), padding=True, truncation=True, return_tensors='pt')
|
| 62 |
+
|
| 63 |
+
# Compute token embeddings
|
| 64 |
+
with torch.no_grad():
|
| 65 |
+
model_output = model(**encoded_input)
|
| 66 |
+
|
| 67 |
+
# Perform pooling
|
| 68 |
+
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
|
| 69 |
+
|
| 70 |
+
# Normalize embeddings
|
| 71 |
+
sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
|
| 72 |
+
|
| 73 |
+
return sentence_embeddings
|
indicator_harmonizer.ipynb
ADDED
|
@@ -0,0 +1,475 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Indicator Harmonizer"
|
| 8 |
+
]
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"cell_type": "markdown",
|
| 12 |
+
"metadata": {},
|
| 13 |
+
"source": [
|
| 14 |
+
"The goal of this code is to provide a recommendation of indicators and detect cases where we migh need to create new"
|
| 15 |
+
]
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"cell_type": "markdown",
|
| 19 |
+
"metadata": {},
|
| 20 |
+
"source": [
|
| 21 |
+
"## 1 Load required packages"
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"cell_type": "code",
|
| 26 |
+
"execution_count": 122,
|
| 27 |
+
"metadata": {},
|
| 28 |
+
"outputs": [],
|
| 29 |
+
"source": [
|
| 30 |
+
"#! pip install transformers\n",
|
| 31 |
+
"#! pip install torch\n",
|
| 32 |
+
"#! pip install scipy\n",
|
| 33 |
+
"#! pip install seaborn"
|
| 34 |
+
]
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"cell_type": "code",
|
| 38 |
+
"execution_count": 10,
|
| 39 |
+
"metadata": {},
|
| 40 |
+
"outputs": [
|
| 41 |
+
{
|
| 42 |
+
"data": {
|
| 43 |
+
"text/plain": [
|
| 44 |
+
"<module 'functions' from '/Users/alanfortunysicart/Documents/GitHub/IndicatorHarmonizer/functions.py'>"
|
| 45 |
+
]
|
| 46 |
+
},
|
| 47 |
+
"execution_count": 10,
|
| 48 |
+
"metadata": {},
|
| 49 |
+
"output_type": "execute_result"
|
| 50 |
+
}
|
| 51 |
+
],
|
| 52 |
+
"source": [
|
| 53 |
+
"import pandas as pd\n",
|
| 54 |
+
"import re\n",
|
| 55 |
+
"import numpy as np\n",
|
| 56 |
+
"import importlib\n",
|
| 57 |
+
"import importlib\n",
|
| 58 |
+
"from seatable_api import Base, context\n",
|
| 59 |
+
"from pandas import json_normalize\n",
|
| 60 |
+
"import importlib\n",
|
| 61 |
+
"import functions as f\n",
|
| 62 |
+
"importlib.reload(f)"
|
| 63 |
+
]
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"cell_type": "markdown",
|
| 67 |
+
"metadata": {},
|
| 68 |
+
"source": [
|
| 69 |
+
"# 2. Load the required data"
|
| 70 |
+
]
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"cell_type": "markdown",
|
| 74 |
+
"metadata": {},
|
| 75 |
+
"source": [
|
| 76 |
+
"Load the request data including the generic indicator requests"
|
| 77 |
+
]
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"cell_type": "code",
|
| 81 |
+
"execution_count": 11,
|
| 82 |
+
"metadata": {},
|
| 83 |
+
"outputs": [],
|
| 84 |
+
"source": [
|
| 85 |
+
"table1 = pd.read_excel('/Users/alanfortunysicart/Downloads/Indicators_Indicators_Frameworks_Default View.xlsx')\n",
|
| 86 |
+
"table1.columns = ['ID',\t'Indicator Name','Framework','Definition','Description',\t'Indicators leonardo','Framework Version','Impact Category']\n",
|
| 87 |
+
"table1['ID'] = table1['ID'].astype(str)"
|
| 88 |
+
]
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"cell_type": "markdown",
|
| 92 |
+
"metadata": {},
|
| 93 |
+
"source": [
|
| 94 |
+
"Create a column concatenating the column's content to be used for the embeddings"
|
| 95 |
+
]
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"cell_type": "code",
|
| 99 |
+
"execution_count": 13,
|
| 100 |
+
"metadata": {},
|
| 101 |
+
"outputs": [],
|
| 102 |
+
"source": [
|
| 103 |
+
"table1 = f.concatenate_columns(table1, columns=f.columns_embeddings_col1)"
|
| 104 |
+
]
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"cell_type": "markdown",
|
| 108 |
+
"metadata": {},
|
| 109 |
+
"source": [
|
| 110 |
+
"Concatenate topic and indicator request name to help the indicator search"
|
| 111 |
+
]
|
| 112 |
+
},
|
| 113 |
+
{
|
| 114 |
+
"cell_type": "code",
|
| 115 |
+
"execution_count": 14,
|
| 116 |
+
"metadata": {},
|
| 117 |
+
"outputs": [],
|
| 118 |
+
"source": [
|
| 119 |
+
"table2 = pd.read_excel('/Users/alanfortunysicart/Downloads/Indicators_Indicators_Default view(14).xlsx')\n",
|
| 120 |
+
"table2 = f.concatenate_columns(table2,columns=f.columns_embeddings_col2)\n",
|
| 121 |
+
"\n"
|
| 122 |
+
]
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"cell_type": "markdown",
|
| 126 |
+
"metadata": {},
|
| 127 |
+
"source": [
|
| 128 |
+
"# 3. Compute the similarity between leonardo. indicator and the requested names"
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"cell_type": "markdown",
|
| 133 |
+
"metadata": {},
|
| 134 |
+
"source": [
|
| 135 |
+
"### embeddings 1"
|
| 136 |
+
]
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"cell_type": "code",
|
| 140 |
+
"execution_count": 15,
|
| 141 |
+
"metadata": {},
|
| 142 |
+
"outputs": [],
|
| 143 |
+
"source": [
|
| 144 |
+
"embeddings1 = f.get_embbedings(table1, 'concatenated_input')"
|
| 145 |
+
]
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"cell_type": "markdown",
|
| 149 |
+
"metadata": {},
|
| 150 |
+
"source": [
|
| 151 |
+
"### embeddings 2"
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
{
|
| 155 |
+
"cell_type": "code",
|
| 156 |
+
"execution_count": 16,
|
| 157 |
+
"metadata": {},
|
| 158 |
+
"outputs": [],
|
| 159 |
+
"source": [
|
| 160 |
+
"embeddings2 = f.get_embbedings(table2,'concatenated_input')"
|
| 161 |
+
]
|
| 162 |
+
},
|
| 163 |
+
{
|
| 164 |
+
"cell_type": "code",
|
| 165 |
+
"execution_count": 17,
|
| 166 |
+
"metadata": {},
|
| 167 |
+
"outputs": [],
|
| 168 |
+
"source": [
|
| 169 |
+
"\n",
|
| 170 |
+
"# Calculate cosine similarity between the embeddings\n",
|
| 171 |
+
"similarities = f.cos_sim(embeddings1, embeddings2)\n"
|
| 172 |
+
]
|
| 173 |
+
},
|
| 174 |
+
{
|
| 175 |
+
"cell_type": "code",
|
| 176 |
+
"execution_count": 19,
|
| 177 |
+
"metadata": {},
|
| 178 |
+
"outputs": [],
|
| 179 |
+
"source": [
|
| 180 |
+
"\n",
|
| 181 |
+
"\n",
|
| 182 |
+
"# Create a DataFrame for the similarities\n",
|
| 183 |
+
"result_df = pd.DataFrame(similarities, \n",
|
| 184 |
+
" columns=table2[f.table2_id_col])\n"
|
| 185 |
+
]
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"cell_type": "code",
|
| 189 |
+
"execution_count": 130,
|
| 190 |
+
"metadata": {},
|
| 191 |
+
"outputs": [],
|
| 192 |
+
"source": [
|
| 193 |
+
"result_df['Indicator Client'] = mapped_indicators['ID']"
|
| 194 |
+
]
|
| 195 |
+
},
|
| 196 |
+
{
|
| 197 |
+
"cell_type": "markdown",
|
| 198 |
+
"metadata": {},
|
| 199 |
+
"source": [
|
| 200 |
+
"Store the similarities in a matrix format"
|
| 201 |
+
]
|
| 202 |
+
},
|
| 203 |
+
{
|
| 204 |
+
"cell_type": "code",
|
| 205 |
+
"execution_count": 131,
|
| 206 |
+
"metadata": {},
|
| 207 |
+
"outputs": [],
|
| 208 |
+
"source": [
|
| 209 |
+
"# Merge the data to get framework information for both client and indicators\n",
|
| 210 |
+
"mapped_indicators_sel = mapped_indicators[['ID', 'Framework']]\n",
|
| 211 |
+
"indicators_df_sel = indicators_df[['ID', 'Framework']]\n",
|
| 212 |
+
"\n",
|
| 213 |
+
"# Prepare a mapping from indicator ID to framework for both client and indicators\n",
|
| 214 |
+
"client_framework_map = mapped_indicators_sel.set_index('ID')['Framework'].to_dict()\n",
|
| 215 |
+
"indicator_framework_map = indicators_df_sel.set_index('ID')['Framework'].to_dict()\n",
|
| 216 |
+
"\n",
|
| 217 |
+
"# Function to check if there is any common framework element\n",
|
| 218 |
+
"def has_common_framework(client_framework, indicator_framework):\n",
|
| 219 |
+
" client_frameworks = set(client_framework.split(', '))\n",
|
| 220 |
+
" indicator_frameworks = set(indicator_framework.split(', '))\n",
|
| 221 |
+
" return not client_frameworks.isdisjoint(indicator_frameworks)\n",
|
| 222 |
+
"\n",
|
| 223 |
+
"# Replace similarity values with NaN where the frameworks match or contain a common element\n",
|
| 224 |
+
"for client_id in mapped_indicators['ID']:\n",
|
| 225 |
+
" client_framework = client_framework_map.get(client_id)\n",
|
| 226 |
+
" for indicator_id in result_df.columns:\n",
|
| 227 |
+
" if indicator_id != 'Indicator Client':\n",
|
| 228 |
+
" indicator_framework = indicator_framework_map.get(indicator_id)\n",
|
| 229 |
+
" if pd.notna(client_framework) and pd.notna(indicator_framework):\n",
|
| 230 |
+
" # Check if there is any common framework element\n",
|
| 231 |
+
" if has_common_framework(client_framework, indicator_framework):\n",
|
| 232 |
+
" result_df.loc[result_df['Indicator Client'] == client_id, indicator_id] = np.nan\n"
|
| 233 |
+
]
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"cell_type": "code",
|
| 237 |
+
"execution_count": 132,
|
| 238 |
+
"metadata": {},
|
| 239 |
+
"outputs": [],
|
| 240 |
+
"source": [
|
| 241 |
+
"result_df = result_df.drop(columns=['Indicator Client'])"
|
| 242 |
+
]
|
| 243 |
+
},
|
| 244 |
+
{
|
| 245 |
+
"cell_type": "markdown",
|
| 246 |
+
"metadata": {},
|
| 247 |
+
"source": [
|
| 248 |
+
"Find which are the top 5 indicators that are closest"
|
| 249 |
+
]
|
| 250 |
+
},
|
| 251 |
+
{
|
| 252 |
+
"cell_type": "code",
|
| 253 |
+
"execution_count": 133,
|
| 254 |
+
"metadata": {},
|
| 255 |
+
"outputs": [],
|
| 256 |
+
"source": [
|
| 257 |
+
"# Function to return the column names of the top 5 values for each row\n",
|
| 258 |
+
"def top_5_column(row):\n",
|
| 259 |
+
" # Find the top 5 values in the row\n",
|
| 260 |
+
" top_5_values = row.nlargest(5)\n",
|
| 261 |
+
" # Return the column names corresponding to these values\n",
|
| 262 |
+
" return top_5_values.index.tolist()\n",
|
| 263 |
+
"\n",
|
| 264 |
+
"# Convert all columns to numeric data types, coercing non-convertible values to NaN\n",
|
| 265 |
+
"#result_df = result_df.iloc[:,1:].apply(pd.to_numeric, errors='coerce')\n",
|
| 266 |
+
"\n",
|
| 267 |
+
"# Get the list of non-numeric columns\n",
|
| 268 |
+
"#non_numeric_columns = result_df.columns[result_df.dtypes == 'object']\n",
|
| 269 |
+
"\n",
|
| 270 |
+
"# Apply the function to each row of the DataFrame, excluding non-numeric columns\n",
|
| 271 |
+
"result_df['Top 5 Column ID'] = result_df.apply(lambda row: top_5_column(row), axis=1)"
|
| 272 |
+
]
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"cell_type": "code",
|
| 276 |
+
"execution_count": 134,
|
| 277 |
+
"metadata": {},
|
| 278 |
+
"outputs": [],
|
| 279 |
+
"source": [
|
| 280 |
+
"# Create a dictionary for fast lookup\n",
|
| 281 |
+
"id_to_name = dict(zip(indicators_df['ID'], indicators_df['Indicator name (leonardo)']))\n",
|
| 282 |
+
"\n",
|
| 283 |
+
"# Function to map IDs to names\n",
|
| 284 |
+
"def map_ids_to_names(id_list):\n",
|
| 285 |
+
" return [id_to_name.get(id, \"ID\") for id in id_list]\n",
|
| 286 |
+
"\n",
|
| 287 |
+
"# Apply the function to the 'Top 5 Column ID' column\n",
|
| 288 |
+
"result_df['Top 5 Names'] = result_df['Top 5 Column ID'].apply(map_ids_to_names)"
|
| 289 |
+
]
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"cell_type": "code",
|
| 293 |
+
"execution_count": 135,
|
| 294 |
+
"metadata": {},
|
| 295 |
+
"outputs": [],
|
| 296 |
+
"source": [
|
| 297 |
+
"result_df[['Indicator Name','ID','framework'] ]= mapped_indicators[['Indicator name (leonardo)','ID','Framework']]"
|
| 298 |
+
]
|
| 299 |
+
},
|
| 300 |
+
{
|
| 301 |
+
"cell_type": "code",
|
| 302 |
+
"execution_count": 136,
|
| 303 |
+
"metadata": {},
|
| 304 |
+
"outputs": [],
|
| 305 |
+
"source": [
|
| 306 |
+
"result_df[[\"top1name\", \"top2name\", \"top3name\", \"top4name\", \"top5name\"]]= pd.DataFrame(result_df['Top 5 Names'].tolist(), columns=[\"top1name\", \"top2name\", \"top3name\", \"top4name\", \"top5name\"])\n",
|
| 307 |
+
"result_df[[\"top1id\", \"top2id\", \"top3id\", \"top4id\", \"top5id\"]]= pd.DataFrame(result_df['Top 5 Column ID'].tolist(), columns=[\"top1id\", \"top2id\", \"top3id\", \"top4id\", \"top5id\"])\n"
|
| 308 |
+
]
|
| 309 |
+
},
|
| 310 |
+
{
|
| 311 |
+
"cell_type": "code",
|
| 312 |
+
"execution_count": 137,
|
| 313 |
+
"metadata": {},
|
| 314 |
+
"outputs": [],
|
| 315 |
+
"source": [
|
| 316 |
+
"result_df['max_sim'] = np.nanmax(similarities, axis=1)\n",
|
| 317 |
+
"\n",
|
| 318 |
+
"# Calculate min and max of the 'max_sim' column, ignoring NaN values\n",
|
| 319 |
+
"min_val = np.nanmin(result_df['max_sim'])\n",
|
| 320 |
+
"max_val = np.nanmax(result_df['max_sim'])\n",
|
| 321 |
+
"\n",
|
| 322 |
+
"# Normalize the 'max_sim' values\n",
|
| 323 |
+
"result_df['max_sim_normalized'] = (result_df['max_sim'] - min_val) / (max_val - min_val)"
|
| 324 |
+
]
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"cell_type": "markdown",
|
| 328 |
+
"metadata": {},
|
| 329 |
+
"source": [
|
| 330 |
+
"# 4 Asses the quality of the similarity, normalizing the similarity score"
|
| 331 |
+
]
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"cell_type": "markdown",
|
| 335 |
+
"metadata": {},
|
| 336 |
+
"source": [
|
| 337 |
+
"Calculate wwhat the max similarity to identify how reliable the recommendation is and whether new indicators may be required"
|
| 338 |
+
]
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"cell_type": "code",
|
| 342 |
+
"execution_count": 138,
|
| 343 |
+
"metadata": {},
|
| 344 |
+
"outputs": [
|
| 345 |
+
{
|
| 346 |
+
"data": {
|
| 347 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1IAAAIjCAYAAAAJLyrXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAABD+0lEQVR4nO3dfXzO9f////vBjh0zNjPs7N3Mcl5OCu/2VsrEnNUUejcmzSL1joROpN5yknd8VBKRdyfOKhFvSSWZkE6onKVTNafJRoix1Ry21++Pfo5vaxt7HnbsOA5u18tll7yer+fxPB6veVw2914nh82yLEsAAAAAgDKr5O0CAAAAAMDfEKQAAAAAwBBBCgAAAAAMEaQAAAAAwBBBCgAAAAAMEaQAAAAAwBBBCgAAAAAMEaQAAAAAwBBBCgAAAAAMEaQAwE+NHTtWNputQt4rMTFRiYmJru1169bJZrNpyZIlFfL+/fv3V926dSvkvdx18uRJDRw4UFFRUbLZbBo2bJi3S6pQFdmPAOALCFIA4APmzp0rm83m+goKClJMTIw6d+6sadOm6cSJE+XyPgcOHNDYsWO1bdu2clmvPPlybWXxxBNPaO7cufrXv/6lV155Rf369fN2SQAAD7JZlmV5uwgAuNjNnTtX6enpGj9+vOLj4+V0OpWdna1169YpIyNDderU0fLly9W8eXPXa06fPq3Tp08rKCiozO+zadMm/f3vf9ecOXPUv3//Mr/u1KlTkqTAwEBJf5yRat++vRYvXqxbbrmlzOu4W5vT6VRhYaEcDke5vJcn/OMf/1BAQIA+/vhjb5fiFe70IwD4swBvFwAA+H+6du2q1q1bu7ZHjRqlNWvW6MYbb1T37t313XffqUqVKpKkgIAABQR49sd4Xl6egoODXQHKW+x2u1ffvywOHTqkyy67zNtleE1F9CMA+BIu7QMAH3f99ddr9OjR2rt3r1599VXXeEn3pGRkZKht27YKCwtTtWrV1KhRIz3yyCOS/jiL9Pe//12SlJ6e7rqMcO7cuZL+uA+qadOm2rx5s6677joFBwe7XvvXe6TOKCgo0COPPKKoqChVrVpV3bt3108//VRkTt26dUs8+/XnNc9VW0n3SOXm5ur+++9XbGysHA6HGjVqpKeeekp/vdDCZrNpyJAhWrZsmZo2bSqHw6HLL79cK1euLPkb/heHDh3SgAEDFBkZqaCgILVo0ULz5s1z7T9zv9ju3bv17rvvumrfs2dPqWueqWnx4sW67LLLVKVKFbVp00ZfffWVJOm///2v6tevr6CgICUmJhZb66OPPtI///lP1alTRw6HQ7GxsRo+fLh+++23InXXrl1biYmJRb4nmZmZqlq1qlJSUsp0/NIfZwTHjRunBg0aKCgoSDVr1lTbtm2VkZHhmlNSP57vcQKAL+N/HQGAH+jXr58eeeQRrVq1SnfeeWeJc7755hvdeOONat68ucaPHy+Hw6HMzEx98sknkqQmTZpo/PjxeuyxxzRo0CBde+21kqSrr77atcaRI0fUtWtX9e7dW7fddpsiIyPPWtd//vMf2Ww2jRw5UocOHdLUqVPVsWNHbdu2zXXmrCzKUtufWZal7t27a+3atRowYICuuOIKvf/++3rwwQf1888/65lnniky/+OPP9bSpUt1zz33KCQkRNOmTVOvXr20b98+1axZs9S6fvvtNyUmJiozM1NDhgxRfHy8Fi9erP79++vYsWO677771KRJE73yyisaPny4LrnkEt1///2SpNq1a5/1mD/66CMtX75cgwcPliRNnDhRN954ox566CHNnDlT99xzj3799VdNnjxZd9xxh9asWeN67eLFi5WXl6d//etfqlmzpj7//HNNnz5d+/fv1+LFiyVJERERev755/XPf/5T06dP19ChQ1VYWKj+/fsrJCREM2fOPMffyv8zduxYTZw4UQMHDtRVV12lnJwcbdq0SVu2bFFSUpLHjhMAfJoFAPC6OXPmWJKsL774otQ51atXt6688krX9pgxY6w//xh/5plnLEnWL7/8UuoaX3zxhSXJmjNnTrF97dq1syRZs2bNKnFfu3btXNtr1661JFl/+9vfrJycHNf4G2+8YUmynn32WddYXFyclZaWds41z1ZbWlqaFRcX59petmyZJcmaMGFCkXm33HKLZbPZrMzMTNeYJCswMLDI2JdffmlJsqZPn17svf5s6tSpliTr1VdfdY2dOnXKatOmjVWtWrUixx4XF2fdcMMNZ13vzzU5HA5r9+7drrH//ve/liQrKiqqyLqjRo2yJBWZm5eXV2zNiRMnWjabzdq7d2+R8T59+ljBwcHWDz/8YD355JOWJGvZsmVlqvOMFi1anPPY/tqPlnX+xwkAvoxL+wDAT1SrVu2sT+8LCwuTJL311lsqLCx06z0cDofS09PLPP/2229XSEiIa/uWW25RdHS0VqxY4db7l9WKFStUuXJlDR06tMj4/fffL8uy9N577xUZ79ixo+rVq+fabt68uUJDQ7Vr165zvk9UVJT69OnjGrPb7Ro6dKhOnjypDz/80O1j6NChQ5HLFRMSEiRJvXr1KvI9PTP+51r/fLYvNzdXhw8f1tVXXy3LsrR169Yi7/Pcc8+pevXquuWWWzR69Gj169dPN910k1GtYWFh+uabb/Tjjz8avU46v+MEAF9GkAIAP3Hy5Mki//D8q5SUFF1zzTUaOHCgIiMj1bt3b73xxhtGoepvf/ub0YMlGjRoUGTbZrOpfv36Hr/XZe/evYqJiSn2/WjSpIlr/5/VqVOn2Bo1atTQr7/+es73adCggSpVKvrrsrT3MfHXmqpXry5Jio2NLXH8z7Xu27dP/fv3V3h4uKpVq6batWurXbt2kqTjx48XeX14eLimTZum7du3q3r16po2bZpxrePHj9exY8fUsGFDNWvWTA8++KC2b99epteez3ECgC8jSAGAH9i/f7+OHz+u+vXrlzqnSpUqWr9+vVavXq1+/fpp+/btSklJUVJSkgoKCsr0Pib3NZVVaR/SWtaaykPlypVLHLe8+AkgpdV0rloLCgqUlJSkd999VyNHjtSyZcuUkZHhejBHScH5/fffl/RHSNm/f79xrdddd5127typ2bNnq2nTpnrppZfUsmVLvfTSS+d8rbvHCQC+jiAFAH7glVdekSR17tz5rPMqVaqkDh06aMqUKfr222/1n//8R2vWrNHatWsllR5q3PXXS70sy1JmZmaRS7lq1KihY8eOFXvtX8/mmNQWFxenAwcOFLvU8fvvv3ftLw9xcXH68ccfi4WT8n4fE1999ZV++OEHPf300xo5cqRuuukmdezYUTExMSXOX7lypV566SU99NBDql27ttLS0nT69Gnj9w0PD1d6erpef/11/fTTT2revLnGjh17nkcDAP6LIAUAPm7NmjV6/PHHFR8fr759+5Y67+jRo8XGrrjiCklSfn6+JKlq1aqSVGKwccf8+fOLhJklS5YoKytLXbt2dY3Vq1dPGzdudH2oryS98847xR6TblJbt27dVFBQoOeee67I+DPPPCObzVbk/c9Ht27dlJ2drUWLFrnGTp8+renTp6tatWquy+kq0pkzOX8+c2NZlp599tlic48dO+Z60t4TTzyhl156SVu2bNETTzxh9J5Hjhwpsl2tWjXVr1/f1VcAcDHi8ecA4EPee+89ff/99zp9+rQOHjyoNWvWKCMjQ3FxcVq+fLmCgoJKfe348eO1fv163XDDDYqLi9OhQ4c0c+ZMXXLJJWrbtq2kP0JNWFiYZs2apZCQEFWtWlUJCQmKj493q97w8HC1bdtW6enpOnjwoKZOnar69esXeUT7wIEDtWTJEnXp0kW33nqrdu7cqVdffbXIwx9Ma0tOTlb79u316KOPas+ePWrRooVWrVqlt956S8OGDSu2trsGDRqk//73v+rfv782b96sunXrasmSJfrkk080derUs96z5imNGzdWvXr19MADD+jnn39WaGio/ve//5V4b9F9992nI0eOaPXq1apcubK6dOmigQMHasKECbrpppvUokWLMr3nZZddpsTERLVq1Urh4eHatGmTlixZoiFDhpT34QGA3yBIAYAPeeyxxyRJgYGBCg8PV7NmzTR16lSlp6ef8x/t3bt31549ezR79mwdPnxYtWrVUrt27TRu3DjXjfx2u13z5s3TqFGjdPfdd+v06dOaM2eO20HqkUce0fbt2zVx4kSdOHFCHTp00MyZMxUcHOya07lzZz399NOaMmWKhg0bptatW+udd95xfd7SGSa1VapUScuXL9djjz2mRYsWac6cOapbt66efPLJYuuejypVqmjdunV6+OGHNW/ePOXk5KhRo0aaM2dOiR8yXBHsdrvefvttDR06VBMnTlRQUJB69OihIUOGFAlGy5cv1/z58/X000+rcePGrvEpU6YoIyNDaWlp+uKLL2S328/5nkOHDtXy5cu1atUq5efnKy4uThMmTNCDDz7okWMEAH9gs7irEwAAAACMcI8UAAAAABji0j4AAC5Sv/32W7HPnfqr8PBwo88WA4CLBUEKAICL1KJFi5Senn7WOWvXrlViYmLFFAQAfoR7pAAAuEhlZWXpm2++OeucVq1aqUaNGhVUEQD4D4IUAAAAABjiYRMAAAAAYIh7pCQVFhbqwIEDCgkJkc1m83Y5AAAAALzEsiydOHFCMTExqlSp9PNOBClJBw4cUGxsrLfLAAAAAOAjfvrpJ11yySWl7idISQoJCZH0xzcrNDTUq7U4nU6tWrVKnTp1KtOnzQP0DEzRMzBFz8AUPQMTvtYvOTk5io2NdWWE0hCkJNflfKGhoT4RpIKDgxUaGuoTjQTfR8/AFD0DU/QMTNEzMOGr/XKuW3542AQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGArwdgEA4CnJyZ5d/+23Pbs+AADwXZyRAgAAAABDBCkAAAAAMESQAgAAAABDBCkAAAAAMESQAgAAAABDBCkAAAAAMESQAgAAAABDBCkAAAAAMESQAgAAAABDBCkAAAAAMESQAgAAAABDBCkAAAAAMESQAgAAAABDBCkAAAAAMESQAgAAAABDBCkAAAAAMESQAgAAAABDBCkAAAAAMESQAgAAAABDBCkAAAAAMESQAgAAAABDXg1S69evV3JysmJiYmSz2bRs2bIi+202W4lfTz75pGtO3bp1i+2fNGlSBR8JAAAAgIuJV4NUbm6uWrRooRkzZpS4Pysrq8jX7NmzZbPZ1KtXryLzxo8fX2TevffeWxHlAwAAALhIBXjzzbt27aquXbuWuj8qKqrI9ltvvaX27dvr0ksvLTIeEhJSbC4AAAAAeIpXg5SJgwcP6t1339W8efOK7Zs0aZIef/xx1alTR6mpqRo+fLgCAko/tPz8fOXn57u2c3JyJElOp1NOp7P8izdw5v29XQf8Bz1TOrvds+v767ecnoEpegam6BmY8LV+KWsdNsuyLA/XUiY2m01vvvmmbr755hL3T548WZMmTdKBAwcUFBTkGp8yZYpatmyp8PBwffrppxo1apTS09M1ZcqUUt9r7NixGjduXLHxBQsWKDg4+LyPBQAAAIB/ysvLU2pqqo4fP67Q0NBS5/lNkGrcuLGSkpI0ffr0s64ze/Zs3XXXXTp58qQcDkeJc0o6IxUbG6vDhw+f9ZtVEZxOpzIyMpSUlCS7p/93Oi4I9EzpUlI8u/6iRZ5d31PoGZiiZ2CKnoEJX+uXnJwc1apV65xByi8u7fvoo4+0Y8cOLSrDv1oSEhJ0+vRp7dmzR40aNSpxjsPhKDFk2e12n/jLk3yrFvgHeqY4T18h4O/fbnoGpugZmKJnYMJX+qWsNfjF50i9/PLLatWqlVq0aHHOudu2bVOlSpUUERFRAZUBAAAAuBh59YzUyZMnlZmZ6drevXu3tm3bpvDwcNWpU0fSH6fWFi9erKeffrrY6zds2KDPPvtM7du3V0hIiDZs2KDhw4frtttuU40aNSrsOAAAAABcXLwapDZt2qT27du7tkeMGCFJSktL09y5cyVJCxculGVZ6tOnT7HXOxwOLVy4UGPHjlV+fr7i4+M1fPhw1zoAAAAA4AleDVKJiYk617MuBg0apEGDBpW4r2XLltq4caMnSgMAAACAUvnFPVIAAAAA4EsIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYCvF0AgItbcrK3KwAAADDn1TNS69evV3JysmJiYmSz2bRs2bIi+/v37y+bzVbkq0uXLkXmHD16VH379lVoaKjCwsI0YMAAnTx5sgKPAgAAAMDFxqtBKjc3Vy1atNCMGTNKndOlSxdlZWW5vl5//fUi+/v27atvvvlGGRkZeuedd7R+/XoNGjTI06UDAAAAuIh59dK+rl27qmvXrmed43A4FBUVVeK+7777TitXrtQXX3yh1q1bS5KmT5+ubt266amnnlJMTEy51wwAAAAAPn+P1Lp16xQREaEaNWro+uuv14QJE1SzZk1J0oYNGxQWFuYKUZLUsWNHVapUSZ999pl69OhR4pr5+fnKz893befk5EiSnE6nnE6nB4/m3M68v7frgP/w956x271dgfv89Fvu9z2DikfPwBQ9AxO+1i9lrcOng1SXLl3Us2dPxcfHa+fOnXrkkUfUtWtXbdiwQZUrV1Z2drYiIiKKvCYgIEDh4eHKzs4udd2JEydq3LhxxcZXrVql4ODgcj8Od2RkZHi7BPgZf+2ZtDRvV+C+FSu8XcH58deegffQMzBFz8CEr/RLXl5emeb5dJDq3bu368/NmjVT8+bNVa9ePa1bt04dOnRwe91Ro0ZpxIgRru2cnBzFxsaqU6dOCg0NPa+az5fT6VRGRoaSkpJk9+f/VY8K4+89k5Li7Qrct2iRtytwj7/3DCoePQNT9AxM+Fq/nLla7Vx8Okj91aWXXqpatWopMzNTHTp0UFRUlA4dOlRkzunTp3X06NFS76uS/rjvyuFwFBu32+0+8Zcn+VYt8A/+2jM+chbfLT17em7tt9/23Npn+GvPwHvoGZiiZ2DCV/qlrDX41Qfy7t+/X0eOHFF0dLQkqU2bNjp27Jg2b97smrNmzRoVFhYqISHBW2UCAAAAuMB59YzUyZMnlZmZ6drevXu3tm3bpvDwcIWHh2vcuHHq1auXoqKitHPnTj300EOqX7++OnfuLElq0qSJunTpojvvvFOzZs2S0+nUkCFD1Lt3b57YBwAAAMBjvHpGatOmTbryyit15ZVXSpJGjBihK6+8Uo899pgqV66s7du3q3v37mrYsKEGDBigVq1a6aOPPipyWd5rr72mxo0bq0OHDurWrZvatm2rF154wVuHBAAAAOAi4NUzUomJibIsq9T977///jnXCA8P14IFC8qzLAAAAAA4K7+6RwoAAAAAfAFBCgAAAAAM+dXjzwHgYpGc7Lm17Xb//iBkAAB8AWekAAAAAMAQQQoAAAAADBGkAAAAAMAQQQoAAAAADBGkAAAAAMAQQQoAAAAADBGkAAAAAMAQQQoAAAAADBGkAAAAAMAQQQoAAAAADBGkAAAAAMAQQQoAAAAADBGkAAAAAMAQQQoAAAAADBGkAAAAAMAQQQoAAAAADBGkAAAAAMAQQQoAAAAADBGkAAAAAMAQQQoAAAAADBGkAAAAAMAQQQoAAAAADBGkAAAAAMAQQQoAAAAADBGkAAAAAMAQQQoAAAAADBGkAAAAAMAQQQoAAAAADBGkAAAAAMAQQQoAAAAADBGkAAAAAMAQQQoAAAAADAV4uwAAvi052dsVAAAA+B7OSAEAAACAIYIUAAAAABgiSAEAAACAIYIUAAAAABgiSAEAAACAIYIUAAAAABgiSAEAAACAIYIUAAAAABgiSAEAAACAIYIUAAAAABgiSAEAAACAIYIUAAAAABjyapBav369kpOTFRMTI5vNpmXLlrn2OZ1OjRw5Us2aNVPVqlUVExOj22+/XQcOHCiyRt26dWWz2Yp8TZo0qYKPBAAAAMDFxKtBKjc3Vy1atNCMGTOK7cvLy9OWLVs0evRobdmyRUuXLtWOHTvUvXv3YnPHjx+vrKws19e9995bEeUDAAAAuEgFePPNu3btqq5du5a4r3r16srIyCgy9txzz+mqq67Svn37VKdOHdd4SEiIoqKiPForAAAAAJzh1SBl6vjx47LZbAoLCysyPmnSJD3++OOqU6eOUlNTNXz4cAUElH5o+fn5ys/Pd23n5ORI+uNyQqfT6ZHay+rM+3u7DvgPT/eM3e6RZeFFdjs/Z2CG300wRc/AhK/1S1nrsFmWZXm4ljKx2Wx68803dfPNN5e4//fff9c111yjxo0b67XXXnONT5kyRS1btlR4eLg+/fRTjRo1Sunp6ZoyZUqp7zV27FiNGzeu2PiCBQsUHBx83scCAAAAwD/l5eUpNTVVx48fV2hoaKnz/CJIOZ1O9erVS/v379e6devOekCzZ8/WXXfdpZMnT8rhcJQ4p6QzUrGxsTp8+PBZ164ITqdTGRkZSkpKkp1TASgDT/dMSkq5Lwkvs9udSk3l5wzKjt9NMEXPwISv9UtOTo5q1ap1ziDl85f2OZ1O3Xrrrdq7d6/WrFlzzqCTkJCg06dPa8+ePWrUqFGJcxwOR4khy263+8RfnuRbtcA/eKpnfOQsOzyAnzMwRc/AFD0DE77SL2WtwaeD1JkQ9eOPP2rt2rWqWbPmOV+zbds2VapUSRERERVQIQAAAICLkVeD1MmTJ5WZmena3r17t7Zt26bw8HBFR0frlltu0ZYtW/TOO++ooKBA2dnZkqTw8HAFBgZqw4YN+uyzz9S+fXuFhIRow4YNGj58uG677TbVqFHDW4cFAAAA4ALn1SC1adMmtW/f3rU9YsQISVJaWprGjh2r5cuXS5KuuOKKIq9bu3atEhMT5XA4tHDhQo0dO1b5+fmKj4/X8OHDXesAAAAAgCd4NUglJibqbM+6ONdzMFq2bKmNGzeWd1kAAAAAcFaVvF0AAAAAAPgbghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAhghQAAAAAGCJIAQAAAIAht4LUrl27yrsOAAAAAPAbbgWp+vXrq3379nr11Vf1+++/l3dNAAAAAODT3ApSW7ZsUfPmzTVixAhFRUXprrvu0ueff17etQEAAACAT3IrSF1xxRV69tlndeDAAc2ePVtZWVlq27atmjZtqilTpuiXX34p7zoBAAAAwGec18MmAgIC1LNnTy1evFj/93//p8zMTD3wwAOKjY3V7bffrqysrPKqEwAAAAB8xnkFqU2bNumee+5RdHS0pkyZogceeEA7d+5URkaGDhw4oJtuuqm86gQAAAAAnxHgzoumTJmiOXPmaMeOHerWrZvmz5+vbt26qVKlP3JZfHy85s6dq7p165ZnrQAAAADgE9wKUs8//7zuuOMO9e/fX9HR0SXOiYiI0Msvv3xexQEAAACAL3IrSP3444/nnBMYGKi0tDR3lgcAVICUFMnpLP913367/NcEAMDXuHWP1Jw5c7R48eJi44sXL9a8efPOuygAAAAA8GVuBamJEyeqVq1axcYjIiL0xBNPnHdRAAAAAODL3ApS+/btU3x8fLHxuLg47du377yLAgAAAABf5laQioiI0Pbt24uNf/nll6pZs+Z5FwUAAAAAvsytINWnTx8NHTpUa9euVUFBgQoKCrRmzRrdd9996t27d3nXCAAAAAA+xa2n9j3++OPas2ePOnTooICAP5YoLCzU7bffzj1SAAAAAC54bgWpwMBALVq0SI8//ri+/PJLValSRc2aNVNcXFx51wcAAAAAPsetIHVGw4YN1bBhw/KqBQAAAAD8gltBqqCgQHPnztUHH3ygQ4cOqbCwsMj+NWvWlEtxAAAAAOCL3ApS9913n+bOnasbbrhBTZs2lc1mK++6AAAAAMBnuRWkFi5cqDfeeEPdunUr73oAAAAAwOe59fjzwMBA1a9fv7xrAQAAAAC/4FaQuv/++/Xss8/KsqzyrgcAAAAAfJ5bl/Z9/PHHWrt2rd577z1dfvnlstvtRfYvXbq0XIoDAAAAAF/k1hmpsLAw9ejRQ+3atVOtWrVUvXr1Il9ltX79eiUnJysmJkY2m03Lli0rst+yLD322GOKjo5WlSpV1LFjR/34449F5hw9elR9+/ZVaGiowsLCNGDAAJ08edKdwwIAAACAMnHrjNScOXPK5c1zc3PVokUL3XHHHerZs2ex/ZMnT9a0adM0b948xcfHa/To0ercubO+/fZbBQUFSZL69u2rrKwsZWRkyOl0Kj09XYMGDdKCBQvKpUYAAAAA+Cu3P5D39OnTWrdunXbu3KnU1FSFhITowIEDCg0NVbVq1cq0RteuXdW1a9cS91mWpalTp+rf//63brrpJknS/PnzFRkZqWXLlql379767rvvtHLlSn3xxRdq3bq1JGn69Onq1q2bnnrqKcXExLh7eAAAAABQKreC1N69e9WlSxft27dP+fn5SkpKUkhIiP7v//5P+fn5mjVr1nkXtnv3bmVnZ6tjx46userVqyshIUEbNmxQ7969tWHDBoWFhblClCR17NhRlSpV0meffaYePXqUuHZ+fr7y8/Nd2zk5OZIkp9Mpp9N53rWfjzPv7+064D883TN/uQUSFwC73Vnkv+WNH18XHn43wRQ9AxO+1i9lrcPtD+Rt3bq1vvzyS9WsWdM13qNHD915553uLFlMdna2JCkyMrLIeGRkpGtfdna2IiIiiuwPCAhQeHi4a05JJk6cqHHjxhUbX7VqlYKDg8+39HKRkZHh7RLgZzzVM2lpHlkWPiA11TM9s2KFR5aFD+B3E0zRMzDhK/2Sl5dXpnluBamPPvpIn376qQIDA4uM161bVz///LM7S1aoUaNGacSIEa7tnJwcxcbGqlOnTgoNDfViZX8k4IyMDCUlJRV7GiJQEk/3TEpKuS8JL7PbnUpNzdCCBUlyOsu/ZxYtKvcl4WX8boIpegYmfK1fzlytdi5uBanCwkIVFBQUG9+/f79CQkLcWbKYqKgoSdLBgwcVHR3tGj948KCuuOIK15xDhw4Ved3p06d19OhR1+tL4nA45HA4io3b7Xaf+MuTfKsW+AdP9YyPnGWHBziddo8EKX50Xbj43QRT9AxM+Eq/lLUGtx5/3qlTJ02dOtW1bbPZdPLkSY0ZM0bdunVzZ8li4uPjFRUVpQ8++MA1lpOTo88++0xt2rSRJLVp00bHjh3T5s2bXXPWrFmjwsJCJSQklEsdAAAAAPBXbp2Revrpp9W5c2dddtll+v3335Wamqoff/xRtWrV0uuvv17mdU6ePKnMzEzX9u7du7Vt2zaFh4erTp06GjZsmCZMmKAGDRq4Hn8eExOjm2++WZLUpEkTdenSRXfeeadmzZolp9OpIUOGqHfv3jyxDwAAAIDHuBWkLrnkEn355ZdauHChtm/frpMnT2rAgAHq27evqlSpUuZ1Nm3apPbt27u2z9y3lJaWprlz5+qhhx5Sbm6uBg0apGPHjqlt27ZauXKl6zOkJOm1117TkCFD1KFDB1WqVEm9evXStGnT3DksAAAAACgTtz9HKiAgQLfddtt5vXliYqIsyyp1v81m0/jx4zV+/PhS54SHh/PhuwAAAAAqlFtBav78+Wfdf/vtt7tVDAAAAAD4A7c/R+rPnE6n8vLyFBgYqODgYIIUAAAAgAuaW0/t+/XXX4t8nTx5Ujt27FDbtm2NHjYBAAAAAP7IrSBVkgYNGmjSpEnFzlYBAAAAwIWm3IKU9McDKA4cOFCeSwIAAACAz3HrHqnly5cX2bYsS1lZWXruued0zTXXlEthAAAAAOCr3ApSZz4Q9wybzabatWvr+uuv19NPP10edQEAAACAz3IrSBUWFpZ3HQAAAADgN8r1HikAAAAAuBi4dUZqxIgRZZ47ZcoUd94CAAAAAHyWW0Fq69at2rp1q5xOpxo1aiRJ+uGHH1S5cmW1bNnSNc9ms5VPlQAAAADgQ9wKUsnJyQoJCdG8efNUo0YNSX98SG96erquvfZa3X///eVaJAAAAAD4ErfukXr66ac1ceJEV4iSpBo1amjChAk8tQ8AAADABc+tIJWTk6Nffvml2Pgvv/yiEydOnHdRAAAAAODL3ApSPXr0UHp6upYuXar9+/dr//79+t///qcBAwaoZ8+e5V0jAAAAAPgUt+6RmjVrlh544AGlpqbK6XT+sVBAgAYMGKAnn3yyXAsEAAAAAF/jVpAKDg7WzJkz9eSTT2rnzp2SpHr16qlq1arlWhwAAAAA+CK3gtQZWVlZysrK0nXXXacqVarIsiweeQ54SUqK9P+fIAYAAICHuXWP1JEjR9ShQwc1bNhQ3bp1U1ZWliRpwIABPPocAAAAwAXPrSA1fPhw2e127du3T8HBwa7xlJQUrVy5styKAwAAAABf5NalfatWrdL777+vSy65pMh4gwYNtHfv3nIpDAAAAAB8lVtnpHJzc4uciTrj6NGjcjgc510UAAAAAPgyt4LUtddeq/nz57u2bTabCgsLNXnyZLVv377cigMAAAAAX+TWpX2TJ09Whw4dtGnTJp06dUoPPfSQvvnmGx09elSffPJJedcIAAAAAD7FrTNSTZs21Q8//KC2bdvqpptuUm5urnr27KmtW7eqXr165V0jAAAAAPgU4zNSTqdTXbp00axZs/Too496oiYAAAAA8GnGZ6Tsdru2b9/uiVoAAAAAwC+4dWnfbbfdppdffrm8awEAAAAAv+DWwyZOnz6t2bNna/Xq1WrVqpWqVq1aZP+UKVPKpTgAAAAA8EVGQWrXrl2qW7euvv76a7Vs2VKS9MMPPxSZY7PZyq86AAAAAPBBRkGqQYMGysrK0tq1ayVJKSkpmjZtmiIjIz1SHAAAAAD4IqN7pCzLKrL93nvvKTc3t1wLAgAAAABf59bDJs74a7ACAAAAgIuBUZCy2WzF7oHinigAAAAAFxuje6Qsy1L//v3lcDgkSb///rvuvvvuYk/tW7p0aflVCAAAAAA+xihIpaWlFdm+7bbbyrUYAAAAAPAHRkFqzpw5nqoDAAAAAPzGeT1sAgAAAAAuRgQpAAAAADBEkAIAAAAAQwQpAAAAADBEkAIAAAAAQwQpAAAAADBEkAIAAAAAQwQpAAAAADBEkAIAAAAAQwQpAAAAADBEkAIAAAAAQz4fpOrWrSubzVbsa/DgwZKkxMTEYvvuvvtuL1cNAAAA4EIW4O0CzuWLL75QQUGBa/vrr79WUlKS/vnPf7rG7rzzTo0fP961HRwcXKE1AgAAALi4+HyQql27dpHtSZMmqV69emrXrp1rLDg4WFFRURVdGgAAAICLlM8HqT87deqUXn31VY0YMUI2m801/tprr+nVV19VVFSUkpOTNXr06LOelcrPz1d+fr5rOycnR5LkdDrldDo9dwBlcOb9vV0H/MeZXrHb6RmUzZle8VTP8OPrwsPvJpiiZ2DC1/qlrHXYLMuyPFxLuXnjjTeUmpqqffv2KSYmRpL0wgsvKC4uTjExMdq+fbtGjhypq666SkuXLi11nbFjx2rcuHHFxhcsWMBlgQAAAMBFLC8vT6mpqTp+/LhCQ0NLnedXQapz584KDAzU22+/XeqcNWvWqEOHDsrMzFS9evVKnFPSGanY2FgdPnz4rN+siuB0OpWRkaGkpCTZ7Xav1gL/cKZnFixIktNJz+Dc7HanUlM91zOLFpX7kvAyfjfBFD0DE77WLzk5OapVq9Y5g5TfXNq3d+9erV69+qxnmiQpISFBks4apBwOhxwOR7Fxu93uE395km/VAv/gdNoJUjDiqZ7hR9eFi99NMEXPwISv9EtZa/CbIDVnzhxFRETohhtuOOu8bdu2SZKio6MroCoAwF8lJ3t2/bNclAAAQIXxiyBVWFioOXPmKC0tTQEB/6/knTt3asGCBerWrZtq1qyp7du3a/jw4bruuuvUvHlzL1YMAAAA4ELmF0Fq9erV2rdvn+64444i44GBgVq9erWmTp2q3NxcxcbGqlevXvr3v//tpUoBAAAAXAz8Ikh16tRJJT0TIzY2Vh9++KEXKgIAAABwMavk7QIAAAAAwN8QpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAUIC3CwAAwERysufWfvttz60NALiwcEYKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEEEKAAAAAAwRpAAAAADAEI8/ByqAJx/XbLdLaWmeWx8AAADFcUYKAAAAAAwRpAAAAADAEEEKAAAAAAz5dJAaO3asbDZbka/GjRu79v/+++8aPHiwatasqWrVqqlXr146ePCgFysGAAAAcDHw6SAlSZdffrmysrJcXx9//LFr3/Dhw/X2229r8eLF+vDDD3XgwAH17NnTi9UCAAAAuBj4/FP7AgICFBUVVWz8+PHjevnll7VgwQJdf/31kqQ5c+aoSZMm2rhxo/7xj3+UumZ+fr7y8/Nd2zk5OZIkp9Mpp9NZzkdg5sz7e7sOlC+73ZNrO4v8FzgXeqZ0/OgtGb+bYIqegQlf65ey1mGzLMvycC1uGzt2rJ588klVr15dQUFBatOmjSZOnKg6depozZo16tChg3799VeFhYW5XhMXF6dhw4Zp+PDhZ1133LhxxcYXLFig4OBgTxwKAAAAAD+Ql5en1NRUHT9+XKGhoaXO8+kzUgkJCZo7d64aNWqkrKwsjRs3Ttdee62+/vprZWdnKzAwsEiIkqTIyEhlZ2efdd1Ro0ZpxIgRru2cnBzFxsaqU6dOZ/1mVQSn06mMjAwlJSXJ7snTGKhQKSmeW9tudyo1NUMLFiTJ6aRncG70TOkWLfJ2Bb6J300wRc/AhK/1y5mr1c7Fp4NU165dXX9u3ry5EhISFBcXpzfeeENVqlRxe12HwyGHw1Fs3G63+8RfnuRbteD8VcSZaqfTzj+KYYSeKY4fu2fH7yaYomdgwlf6paw1+PzDJv4sLCxMDRs2VGZmpqKionTq1CkdO3asyJyDBw+WeE8VAAAAAJQXvwpSJ0+e1M6dOxUdHa1WrVrJbrfrgw8+cO3fsWOH9u3bpzZt2nixSgAAAAAXOp++tO+BBx5QcnKy4uLidODAAY0ZM0aVK1dWnz59VL16dQ0YMEAjRoxQeHi4QkNDde+996pNmzZnfWIfAAAAAJwvnw5S+/fvV58+fXTkyBHVrl1bbdu21caNG1W7dm1J0jPPPKNKlSqpV69eys/PV+fOnTVz5kwvVw0AAADgQufTQWrhwoVn3R8UFKQZM2ZoxowZFVQRAAAAAPjZPVIAAAAA4AsIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgKMDbBQAA4CuSk71dgfveftvbFQDAxYUzUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgyKeD1MSJE/X3v/9dISEhioiI0M0336wdO3YUmZOYmCibzVbk6+677/ZSxQAAAAAuBj4dpD788EMNHjxYGzduVEZGhpxOpzp16qTc3Nwi8+68805lZWW5viZPnuyligEAAABcDAK8XcDZrFy5ssj23LlzFRERoc2bN+u6665zjQcHBysqKqrM6+bn5ys/P9+1nZOTI0lyOp1yOp3nWfX5OfP+3q4D5ctu9+TaziL/Bc6FnrkwefLXBr+bYIqegQlf65ey1mGzLMvycC3lJjMzUw0aNNBXX32lpk2bSvrj0r5vvvlGlmUpKipKycnJGj16tIKDg0tdZ+zYsRo3blyx8QULFpz1dQAAAAAubHl5eUpNTdXx48cVGhpa6jy/CVKFhYXq3r27jh07po8//tg1/sILLyguLk4xMTHavn27Ro4cqauuukpLly4tda2SzkjFxsbq8OHDZ/1mVQSn06mMjAwlJSXJ7snTGKhQKSmeW9tudyo1NUMLFiTJ6aRncG70zIVp0SLPrc3vJpiiZ2DC1/olJydHtWrVOmeQ8ulL+/5s8ODB+vrrr4uEKEkaNGiQ68/NmjVTdHS0OnTooJ07d6pevXolruVwOORwOIqN2+12n/jLk3yrFpy/ijhT7XTa+UcxjNAzF5aK+JXB7yaYomdgwlf6paw1+EWQGjJkiN555x2tX79el1xyyVnnJiQkSPrjMsDSghQAABea5GTPrW23S2lpnlsfAPyRTwcpy7J077336s0339S6desUHx9/ztds27ZNkhQdHe3h6gAAAABcrHw6SA0ePFgLFizQW2+9pZCQEGVnZ0uSqlevripVqmjnzp1asGCBunXrppo1a2r79u0aPny4rrvuOjVv3tzL1QMAAAC4UPl0kHr++ecl/fFkvj+bM2eO+vfvr8DAQK1evVpTp05Vbm6uYmNj1atXL/373//2QrUAAAAALhY+HaTO9UDB2NhYffjhhxVUDQAAAAD8oZK3CwAAAAAAf0OQAgAAAABDPn1pH1CRPPnoYAAAAFxYOCMFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYCvF0AAACAJyUne27tt9/23NoAfBtnpAAAAADAEEEKAAAAAAxxaR8AAPAqT156BwCewhkpAAAAADDEGSkAAFAmKSmS0+ntKgDAN3BGCgAAAAAMEaQAAAAAwBBBCgAAAAAMEaQAAAAAwBAPm4Df4PG4AAAA8BWckQIAAAAAQwQpAAAAADBEkAIAAAAAQwQpAAAAADBEkAIAAAAAQwQpAAAAADBEkAIAAAAAQwQpAAAAADDEB/JeZDz9obZvv+3Z9QEAAABfwBkpAAAAADBEkAIAAAAAQ1zah3Ll6UsHAQAAAF/AGSkAAAAAMESQAgAAAABDBCkAAAAAMMQ9Uj4qJUVyOr1dBQAAOBt//lgRT9Zut0tpaZ5bH/AFnJECAAAAAEMEKQAAAAAwxKV9AAAAPoqPFal4XK5ZOk/W7o8umDNSM2bMUN26dRUUFKSEhAR9/vnn3i4JAAAAwAXqgjgjtWjRIo0YMUKzZs1SQkKCpk6dqs6dO2vHjh2KiIjwdnkAAACAJP8+y+ip2v314SQXxBmpKVOm6M4771R6erouu+wyzZo1S8HBwZo9e7a3SwMAAABwAfL7M1KnTp3S5s2bNWrUKNdYpUqV1LFjR23YsKHE1+Tn5ys/P9+1ffz4cUnS0aNH5fTyM8edTqfy8vIkHZFk92ot8Bf0DEzRMzBFz8DUHz1z5MgR2e30DM7Ft/rlxIkTkiTLss46z++D1OHDh1VQUKDIyMgi45GRkfr+++9LfM3EiRM1bty4YuPx8fEeqRHwtDff9HYF8Df0DEzRMzBFz8CEL/bLiRMnVL169VL3+32QcseoUaM0YsQI13ZhYaGOHj2qmjVrymazebEyKScnR7Gxsfrpp58UGhrq1VrgH+gZmKJnYIqegSl6BiZ8rV8sy9KJEycUExNz1nl+H6Rq1aqlypUr6+DBg0XGDx48qKioqBJf43A45HA4ioyFhYV5qkS3hIaG+kQjwX/QMzBFz8AUPQNT9AxM+FK/nO1M1Bl+/7CJwMBAtWrVSh988IFrrLCwUB988IHatGnjxcoAAAAAXKj8/oyUJI0YMUJpaWlq3bq1rrrqKk2dOlW5ublKT0/3dmkAAAAALkAXRJBKSUnRL7/8oscee0zZ2dm64oortHLlymIPoPAHDodDY8aMKXbpIVAaegam6BmYomdgip6BCX/tF5t1ruf6AQAAAACK8Pt7pAAAAACgohGkAAAAAMAQQQoAAAAADBGkAAAAAMAQQcoLZsyYobp16yooKEgJCQn6/PPPzzp/8eLFaty4sYKCgtSsWTOtWLGigiqFrzDpmRdffFHXXnutatSooRo1aqhjx47n7DFceEx/zpyxcOFC2Ww23XzzzZ4tED7FtF+OHTumwYMHKzo6Wg6HQw0bNuR300XGtGemTp2qRo0aqUqVKoqNjdXw4cP1+++/V1C18Lb169crOTlZMTExstlsWrZs2Tlfs27dOrVs2VIOh0P169fX3LlzPV6nKYJUBVu0aJFGjBihMWPGaMuWLWrRooU6d+6sQ4cOlTj/008/VZ8+fTRgwABt3bpVN998s26++WZ9/fXXFVw5vMW0Z9atW6c+ffpo7dq12rBhg2JjY9WpUyf9/PPPFVw5vMW0Z87Ys2ePHnjgAV177bUVVCl8gWm/nDp1SklJSdqzZ4+WLFmiHTt26MUXX9Tf/va3Cq4c3mLaMwsWLNDDDz+sMWPG6LvvvtPLL7+sRYsW6ZFHHqngyuEtubm5atGihWbMmFGm+bt379YNN9yg9u3ba9u2bRo2bJgGDhyo999/38OVGrJQoa666ipr8ODBru2CggIrJibGmjhxYonzb731VuuGG24oMpaQkGDdddddHq0TvsO0Z/7q9OnTVkhIiDVv3jxPlQgf407PnD592rr66qutl156yUpLS7NuuummCqgUvsC0X55//nnr0ksvtU6dOlVRJcLHmPbM4MGDreuvv77I2IgRI6xrrrnGo3XCN0my3nzzzbPOeeihh6zLL7+8yFhKSorVuXNnD1ZmjjNSFejUqVPavHmzOnbs6BqrVKmSOnbsqA0bNpT4mg0bNhSZL0mdO3cudT4uLO70zF/l5eXJ6XQqPDzcU2XCh7jbM+PHj1dERIQGDBhQEWXCR7jTL8uXL1ebNm00ePBgRUZGqmnTpnriiSdUUFBQUWXDi9zpmauvvlqbN292Xf63a9curVixQt26dauQmuF//OXfvwHeLuBicvjwYRUUFCgyMrLIeGRkpL7//vsSX5OdnV3i/OzsbI/VCd/hTs/81ciRIxUTE1PsBxIuTO70zMcff6yXX35Z27Ztq4AK4Uvc6Zddu3ZpzZo16tu3r1asWKHMzEzdc889cjqdGjNmTEWUDS9yp2dSU1N1+PBhtW3bVpZl6fTp07r77ru5tA+lKu3fvzk5Ofrtt99UpUoVL1VWFGekgAvYpEmTtHDhQr355psKCgrydjnwQSdOnFC/fv304osvqlatWt4uB36gsLBQEREReuGFF9SqVSulpKTo0Ucf1axZs7xdGnzUunXr9MQTT2jmzJnasmWLli5dqnfffVePP/64t0sDzgtnpCpQrVq1VLlyZR08eLDI+MGDBxUVFVXia6Kioozm48LiTs+c8dRTT2nSpElavXq1mjdv7sky4UNMe2bnzp3as2ePkpOTXWOFhYWSpICAAO3YsUP16tXzbNHwGnd+xkRHR8tut6ty5cqusSZNmig7O1unTp1SYGCgR2uGd7nTM6NHj1a/fv00cOBASVKzZs2Um5urQYMG6dFHH1WlSvx/fRRV2r9/Q0NDfeZslMQZqQoVGBioVq1a6YMPPnCNFRYW6oMPPlCbNm1KfE2bNm2KzJekjIyMUufjwuJOz0jS5MmT9fjjj2vlypVq3bp1RZQKH2HaM40bN9ZXX32lbdu2ub66d+/uelJSbGxsRZaPCubOz5hrrrlGmZmZrsAtST/88IOio6MJURcBd3omLy+vWFg6E8Qty/JcsfBbfvPvX28/7eJis3DhQsvhcFhz5861vv32W2vQoEFWWFiYlZ2dbVmWZfXr1896+OGHXfM/+eQTKyAgwHrqqaes7777zhozZoxlt9utr776yluHgApm2jOTJk2yAgMDrSVLllhZWVmurxMnTnjrEFDBTHvmr3hq38XFtF/27dtnhYSEWEOGDLF27NhhvfPOO1ZERIQ1YcIEbx0CKphpz4wZM8YKCQmxXn/9dWvXrl3WqlWrrHr16lm33nqrtw4BFezEiRPW1q1bra1bt1qSrClTplhbt2619u7da1mWZT388MNWv379XPN37dplBQcHWw8++KD13XffWTNmzLAqV65srVy50luHUCKClBdMnz7dqlOnjhUYGGhdddVV1saNG1372rVrZ6WlpRWZ/8Ybb1gNGza0AgMDrcsvv9x69913K7hieJtJz8TFxVmSin2NGTOm4guH15j+nPkzgtTFx7RfPv30UyshIcFyOBzWpZdeav3nP/+xTp8+XcFVw5tMesbpdFpjx4616tWrZwUFBVmxsbHWPffcY/36668VXzi8Yu3atSX+2+RMn6SlpVnt2rUr9porrrjCCgwMtC699FJrzpw5FV73udgsi3OqAAAAAGCCe6QAAAAAwBBBCgAAAAAMEaQAAAAAwBBBCgAAAAAMEaQAAAAAwBBBCgAAAAAMEaQAAAAAwBBBCgAAAAAMEaQAADgP69atk81m07Fjx7xdCgCgAtksy7K8XQQAAP7q1KlTOnr0qCIjI2Wz2bxdDgCgghCkAAAAAMAQl/YBAHxOYmKi7r33Xg0bNkw1atRQZGSkXnzxReXm5io9PV0hISGqX7++3nvvPUlSQUGBBgwYoPj4eFWpUkWNGjXSs88+61rv999/1+WXX65Bgwa5xnbu3KmQkBDNnj37nPXs3btXycnJqlGjhqpWrarLL79cK1askFT80r65c+cqLCxM77zzjho1aqTg4GDdcsstysvL07x581S3bl3VqFFDQ4cOVUFBQTl+1wAAFSnA2wUAAFCSefPm6aGHHtLnn3+uRYsW6V//+pfefPNN9ejRQ4888oieeeYZ9evXT/v27ZPdbtcll1yixYsXq2bNmvr00081aNAgRUdH69Zbb1VQUJBee+01JSQk6IYbbtCNN96o2267TUlJSbrjjjvOWcvgwYN16tQprV+/XlWrVtW3336ratWqlTo/Ly9P06ZN08KFC3XixAn17NlTPXr0UFhYmFasWKFdu3apV69euuaaa5SSklKe3zYAQAXh0j4AgM9JTExUQUGBPvroI0l/nHGqXr26evbsqfnz50uSsrOzFR0drQ0bNugf//hHsTWGDBmi7OxsLVmyxDX25JNPavLkyerdu7f+97//6auvvlLNmjXPWU/z5s3Vq1cvjRkzpti+devWqX379vr1118VFhamuXPnKj09XZmZmapXr54k6e6779Yrr7yigwcPugJYly5dVLduXc2aNcv8GwQA8Dou7QMA+KTmzZu7/ly5cmXVrFlTzZo1c41FRkZKkg4dOiRJmjFjhlq1aqXatWurWrVqeuGFF7Rv374ia95///1q2LChnnvuOc2ePbtMIUqShg4dqgkTJuiaa67RmDFjtH379rPODw4OdoWoM7XWrVu3yFmsyMhIV+0AAP9DkAIA+CS73V5k22azFRk784S8wsJCLVy4UA888IAGDBigVatWadu2bUpPT9epU6eKrHHo0CH98MMPqly5sn788ccy1zJw4EDt2rVL/fr101dffaXWrVtr+vTpbtd+ZqywsLDMNQAAfAtBCgDg9z755BNdffXVuueee3TllVeqfv362rlzZ7F5d9xxh5o1a6Z58+Zp5MiR+u6778r8HrGxsbr77ru1dOlS3X///XrxxRfL8xAAAH6Gh00AAPxegwYNNH/+fL3//vuKj4/XK6+8oi+++ELx8fGuOTNmzNCGDRu0fft2xcbG6t1331Xfvn21ceNGBQYGnnX9YcOGqWvXrmrYsKF+/fVXrV27Vk2aNPH0YQEAfBhnpAAAfu+uu+5Sz549lZKSooSEBB05ckT33HOPa//333+vBx98UDNnzlRsbKwkaebMmTp8+LBGjx59zvULCgo0ePBgNWnSRF26dFHDhg01c+ZMjx0PAMD38dQ+AAAAADDEGSkAAAAAMESQAgBc9Lp27apq1aqV+PXEE094uzwAgA/i0j4AwEXv559/1m+//VbivvDwcIWHh1dwRQAAX0eQAgAAAABDXNoHAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIYIUgAAAABgiCAFAAAAAIb+P6TbFtYOJrPcAAAAAElFTkSuQmCC",
|
| 348 |
+
"text/plain": [
|
| 349 |
+
"<Figure size 1000x600 with 1 Axes>"
|
| 350 |
+
]
|
| 351 |
+
},
|
| 352 |
+
"metadata": {},
|
| 353 |
+
"output_type": "display_data"
|
| 354 |
+
}
|
| 355 |
+
],
|
| 356 |
+
"source": [
|
| 357 |
+
"import matplotlib.pyplot as plt\n",
|
| 358 |
+
"import seaborn as sns\n",
|
| 359 |
+
"# Plot a histogram of the 'max_sim' column\n",
|
| 360 |
+
"plt.figure(figsize=(10, 6))\n",
|
| 361 |
+
"plt.hist(result_df['max_sim_normalized'], bins=30, color='blue', alpha=0.7)\n",
|
| 362 |
+
"plt.title('Distribution of max_sim')\n",
|
| 363 |
+
"plt.xlabel('max_sim')\n",
|
| 364 |
+
"plt.ylabel('Frequency')\n",
|
| 365 |
+
"plt.grid(True)\n",
|
| 366 |
+
"plt.show()\n"
|
| 367 |
+
]
|
| 368 |
+
},
|
| 369 |
+
{
|
| 370 |
+
"cell_type": "code",
|
| 371 |
+
"execution_count": 139,
|
| 372 |
+
"metadata": {},
|
| 373 |
+
"outputs": [],
|
| 374 |
+
"source": [
|
| 375 |
+
"result_final = result_df[['Indicator Name','ID','framework','max_sim_normalized','top1name', 'top2name', 'top3name', 'top4name', 'top5name', 'top1id',\n",
|
| 376 |
+
" 'top2id', 'top3id', 'top4id', 'top5id']]"
|
| 377 |
+
]
|
| 378 |
+
},
|
| 379 |
+
{
|
| 380 |
+
"cell_type": "code",
|
| 381 |
+
"execution_count": 140,
|
| 382 |
+
"metadata": {},
|
| 383 |
+
"outputs": [
|
| 384 |
+
{
|
| 385 |
+
"name": "stderr",
|
| 386 |
+
"output_type": "stream",
|
| 387 |
+
"text": [
|
| 388 |
+
"/var/folders/0k/7w7z520532qclyc34vntz5ym0000gn/T/ipykernel_94618/671129894.py:9: SettingWithCopyWarning: \n",
|
| 389 |
+
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
| 390 |
+
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
| 391 |
+
"\n",
|
| 392 |
+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
| 393 |
+
" result_final['top1framework'] = result_final['top1id'].apply(map_framework)\n",
|
| 394 |
+
"/var/folders/0k/7w7z520532qclyc34vntz5ym0000gn/T/ipykernel_94618/671129894.py:10: SettingWithCopyWarning: \n",
|
| 395 |
+
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
| 396 |
+
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
| 397 |
+
"\n",
|
| 398 |
+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
| 399 |
+
" result_final['top2framework'] = result_final['top2id'].apply(map_framework)\n",
|
| 400 |
+
"/var/folders/0k/7w7z520532qclyc34vntz5ym0000gn/T/ipykernel_94618/671129894.py:11: SettingWithCopyWarning: \n",
|
| 401 |
+
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
| 402 |
+
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
| 403 |
+
"\n",
|
| 404 |
+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
| 405 |
+
" result_final['top3framework'] = result_final['top3id'].apply(map_framework)\n",
|
| 406 |
+
"/var/folders/0k/7w7z520532qclyc34vntz5ym0000gn/T/ipykernel_94618/671129894.py:12: SettingWithCopyWarning: \n",
|
| 407 |
+
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
| 408 |
+
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
| 409 |
+
"\n",
|
| 410 |
+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
| 411 |
+
" result_final['top4framework'] = result_final['top4id'].apply(map_framework)\n",
|
| 412 |
+
"/var/folders/0k/7w7z520532qclyc34vntz5ym0000gn/T/ipykernel_94618/671129894.py:13: SettingWithCopyWarning: \n",
|
| 413 |
+
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
| 414 |
+
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
| 415 |
+
"\n",
|
| 416 |
+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
| 417 |
+
" result_final['top5framework'] = result_final['top5id'].apply(map_framework)\n"
|
| 418 |
+
]
|
| 419 |
+
}
|
| 420 |
+
],
|
| 421 |
+
"source": [
|
| 422 |
+
"# Create a mapping from ID to Framework\n",
|
| 423 |
+
"id_to_framework = indicators_df.set_index('ID')['Framework'].to_dict()\n",
|
| 424 |
+
"\n",
|
| 425 |
+
"# Function to map ID to Framework\n",
|
| 426 |
+
"def map_framework(id):\n",
|
| 427 |
+
" return id_to_framework.get(id, np.nan)\n",
|
| 428 |
+
"\n",
|
| 429 |
+
"# Add framework information for top1id to top5id\n",
|
| 430 |
+
"result_final['top1framework'] = result_final['top1id'].apply(map_framework)\n",
|
| 431 |
+
"result_final['top2framework'] = result_final['top2id'].apply(map_framework)\n",
|
| 432 |
+
"result_final['top3framework'] = result_final['top3id'].apply(map_framework)\n",
|
| 433 |
+
"result_final['top4framework'] = result_final['top4id'].apply(map_framework)\n",
|
| 434 |
+
"result_final['top5framework'] = result_final['top5id'].apply(map_framework)"
|
| 435 |
+
]
|
| 436 |
+
},
|
| 437 |
+
{
|
| 438 |
+
"cell_type": "markdown",
|
| 439 |
+
"metadata": {},
|
| 440 |
+
"source": [
|
| 441 |
+
"# 5 Export the results and submit"
|
| 442 |
+
]
|
| 443 |
+
},
|
| 444 |
+
{
|
| 445 |
+
"cell_type": "code",
|
| 446 |
+
"execution_count": 142,
|
| 447 |
+
"metadata": {},
|
| 448 |
+
"outputs": [],
|
| 449 |
+
"source": [
|
| 450 |
+
"result_final.to_csv('Indicator_Framework_Harmonizer_Definition_new_order.csv')"
|
| 451 |
+
]
|
| 452 |
+
}
|
| 453 |
+
],
|
| 454 |
+
"metadata": {
|
| 455 |
+
"kernelspec": {
|
| 456 |
+
"display_name": "seatableToKobo",
|
| 457 |
+
"language": "python",
|
| 458 |
+
"name": "python3"
|
| 459 |
+
},
|
| 460 |
+
"language_info": {
|
| 461 |
+
"codemirror_mode": {
|
| 462 |
+
"name": "ipython",
|
| 463 |
+
"version": 3
|
| 464 |
+
},
|
| 465 |
+
"file_extension": ".py",
|
| 466 |
+
"mimetype": "text/x-python",
|
| 467 |
+
"name": "python",
|
| 468 |
+
"nbconvert_exporter": "python",
|
| 469 |
+
"pygments_lexer": "ipython3",
|
| 470 |
+
"version": "3.10.13"
|
| 471 |
+
}
|
| 472 |
+
},
|
| 473 |
+
"nbformat": 4,
|
| 474 |
+
"nbformat_minor": 2
|
| 475 |
+
}
|