File size: 1,744 Bytes
7f69269 5f11732 7f69269 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
##packages code
import streamlit as st
import pandas as pd
import re
import json
from math import radians, cos, sin, asin, sqrt
import numpy as np
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')
path = 'Climate_site/python_scripts/'
from sentence_transformers import SentenceTransformer, util
@st.cache_resource
def model_nlp():
model = SentenceTransformer('all-MiniLM-L6-v2')
return model
@st.cache_data
def load_dic():
f = open(path + "preqin_venturedealsdetails_encoded.json","r")
dic_companies = json.load(f)
return dic_companies
@st.cache_data
def load_data():
url = path + "preqin_companies_IEA.tsv"
table = pd.read_csv(url, delimiter = "\t" , index_col = 0)
table = table.astype({'portfolio_company_id': 'str'})
return table
table_companies = load_data()
dic_companies = load_dic()
def norm(vector):
return sqrt(sum(x * x for x in vector))
def cosine_similarity2(vec_a, vec_b):
norm_a = norm(vec_a)
norm_b = norm(vec_b)
dot = sum(a * b for a, b in zip(vec_a, vec_b))
return dot / (norm_a * norm_b)
def get_similar_company(field, description , size):
model = model_nlp()
companies = dic_companies[field]
encoded_description = model.encode(description)
scores = np.dot( np.array(list(companies.values())) , np.array(encoded_description))
dic_scores = { str(k) : { "score" : v } for k , v in zip(companies.keys(), scores )}
res = pd.DataFrame(dic_scores).T.sort_values("score" , ascending = False).head(size)
return res.merge(table_companies , left_index = True , right_on = "portfolio_company_id" , how = "left")
|