EmmaScharfmannBerkeley commited on
Commit
7f69269
·
1 Parent(s): b5749a8

Update Climate_site/python_scripts/companies_function_own_details.py

Browse files
Climate_site/python_scripts/companies_function_own_details.py CHANGED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##packages code
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+ import re
6
+ import json
7
+ from math import radians, cos, sin, asin, sqrt
8
+
9
+ from sentence_transformers import SentenceTransformer
10
+
11
+ model = SentenceTransformer('all-MiniLM-L6-v2')
12
+
13
+
14
+ path = 'Climate_site/python_scripts/'
15
+
16
+
17
+ from sentence_transformers import SentenceTransformer, util
18
+
19
+ @st.cache_resource
20
+ def model_nlp():
21
+ model = SentenceTransformer('all-MiniLM-L6-v2')
22
+ return model
23
+
24
+
25
+ @st.cache_data
26
+ def load_dic():
27
+ f = open(path + "preqin_venturedealsdetails_encoded.json","r")
28
+ dic_companies = json.load(f)
29
+ return dic_companies
30
+
31
+ @st.cache_data
32
+ def load_data():
33
+ url = path + "preqin_companies_IEA.tsv"
34
+ table = pd.read_csv(url, delimiter = "\t" , index_col = 0)
35
+ table = table.astype({'portfolio_company_id': 'str'})
36
+
37
+ return table
38
+
39
+
40
+
41
+
42
+ table_companies = load_data()
43
+ dic_companies = load_dic()
44
+
45
+
46
+
47
+
48
+
49
+ def norm(vector):
50
+ return sqrt(sum(x * x for x in vector))
51
+
52
+ def cosine_similarity2(vec_a, vec_b):
53
+ norm_a = norm(vec_a)
54
+ norm_b = norm(vec_b)
55
+ dot = sum(a * b for a, b in zip(vec_a, vec_b))
56
+ return dot / (norm_a * norm_b)
57
+
58
+
59
+
60
+ def get_similar_company(field, description , size):
61
+
62
+ model = model_nlp()
63
+
64
+ companies = dic_companies[field]
65
+
66
+
67
+ encoded_description = model.encode(description)
68
+ scores = np.dot( np.array(list(companies.values())) , np.array(encoded_description))
69
+
70
+
71
+ dic_scores = { str(k) : { "score" : v } for k , v in zip(companies.keys(), scores )}
72
+
73
+ res = pd.DataFrame(dic_scores).T.sort_values("score" , ascending = False).head(size)
74
+
75
+ return res.merge(table_companies , left_index = True , right_on = "portfolio_company_id" , how = "left")
76
+
77
+