Spaces:

EmmaScharfmannBerkeley
/

Synapse_project

Sleeping

App Files Files Community

Synapse_project / Climate_site /python_scripts /functions_companies.py

EmmaScharfmannBerkeley

Update Climate_site/python_scripts/functions_companies.py

d1be032 over 2 years ago

raw

history blame contribute delete

12 kB

	##packages code

	import streamlit as st
	from shapely.geometry import Point
	import pandas as pd
	import re
	import json


	path = 'Climate_site/python_scripts/'

	@st.cache_data
	def load_dic():
	f = open(path + "related_companies.json","r")
	dic_companies = json.load(f)
	return dic_companies

	@st.cache_data
	def load_data():
	url = path + "preqin_companies_IEA.tsv"
	table = pd.read_csv(url, delimiter = "\t" , index_col = 0)
	table = table.astype({'portfolio_company_id': 'str'})

	return table

	table_companies = load_data()
	dic_companies = load_dic()


	#################### General Functions #############################



	def print_extracted_text(name_file):

	file = open(path + 'iea.txt', "r", encoding='utf8')
	lines = file.readlines()
	count = 0
	for index, line in enumerate(lines):
	read_line = line.strip()
	print(read_line)

	file.close()


	iea.txt

	def details(name_file , display):

	file = open(path + "iea.txt", "r")
	lines = file.readlines()

	mark = 0
	dic_details = {}
	count = -1
	for index, line in enumerate(lines):

	line = line.strip()
	if line == "Close explanation":
	break

	if line != "" and (line[0].isnumeric() and ">" in line and " " in line) :
	count += 1


	if mark == 1 and line != "" and line[0] == "*":

	if display == True:
	print(count)
	print(text)
	print(" ")
	dic_details[count] = text
	mark = 0


	if mark == 1:
	text = text + line + " "

	if line.split(" ")[-1] == "Details" or line.split(" ")[-1] == "Hide":
	mark = 1
	text = ""

	return dic_details







	def key_initiatives(name_file , display ):

	file = open(path + 'iea.txt', "r", encoding='utf8')
	lines = file.readlines()


	mark = 0
	dic_key_initiatives = {}
	count = -1
	for index, line in enumerate(lines):

	line = line.strip()
	if line == "Close explanation":
	break

	if line != "" and (line[0].isnumeric() and ">" in line and " " in line) :

	count += 1



	if mark == 1 and line != "" and ( (line[0].isnumeric() and ">" in line and " " in line) or line == "Deployment targets:" or line == "Announced development targets:"):
	if display == True:
	print(count)
	print(text)
	print(" ")

	dic_key_initiatives[count] = text
	mark = 0


	if mark == 1:
	text = text + line + " "

	if line == "Key initiatives:":


	mark = 1
	text = ""

	return dic_key_initiatives




	def deployment_target(name_file , display):

	file = open(path + 'iea.txt', "r", encoding='utf8')
	lines = file.readlines()


	mark = 0
	dic_target = {}
	count = -1
	for index, line in enumerate(lines):

	line = line.strip()
	if line == "Close explanation":
	break

	if line != "" and (line[0].isnumeric() and ">" in line and " " in line) :
	count += 1



	if mark == 1 and line != "" and ((line[0].isnumeric() and ">" in line and " " in line) or line == "Announced cost reduction targets:" or line == "Announced development targets:"):

	if display == True:
	print(count)
	print(text)
	print(" ")

	dic_target[count] = text
	mark = 0


	if mark == 1:
	text = text + line + " "

	if line == "Deployment targets:" or line == "Announced development targets:":

	mark = 1
	text = ""

	return dic_target




	def cost_reduction_target(name_file , display):

	file = open(path + 'iea.txt', "r", encoding='utf8')
	lines = file.readlines()

	mark = 0
	dic_cost = {}
	count = -1
	for index, line in enumerate(lines):

	line = line.strip()

	if line == "Close explanation":
	break

	if line != "" and (line[0].isnumeric() and ">" in line and " " in line) :


	count += 1



	if mark == 1 and line != "" and (line[0].isnumeric() and ">" in line and " " in line) :

	if display == True:
	print(count)
	print(text)
	print(" ")

	dic_cost[count] = text
	mark = 0


	if mark == 1:
	text = text + line + " "

	if line == "Announced cost reduction targets:":

	mark = 1
	text = ""

	return dic_cost



	def key_words(name_file, display ):

	file = open(path + 'iea.txt', "r", encoding='utf8')

	lines = file.readlines()

	list_categories = []
	count = -1
	for index, line in enumerate(lines):

	line = line.strip()

	if line == "Close explanation":
	break

	if line != "" and (line[0].isnumeric() and ">" in line and " " in line) :
	count += 1

	if display == True:
	print("Technologies" , count+1 , ":")

	if line != "":

	if line[0].isnumeric() and ">" in line and " " in line:
	i = 0
	try:
	line = line.split(" ")[2]
	except:
	print(line)
	break

	if "Details" not in lines[index] and "Moderate" not in lines[index]:

	while " " not in line:
	i += 1
	if "Details"==lines[index + i][:7] or "End-use"==lines[index + i][:7]:
	break
	else:
	line = line + " " + lines[index + i]

	#if " Production" in line:
	#line = line.replace(" Production" , "")

	line = line.replace("\n" , " ")
	line = line.replace("/" , " ")
	line = line.replace("-" , " ")
	line = line.split(" ")[0]

	if " " in line:
	line = line.replace(" ", " ")
	line = line.split(">")


	if "(" in line[-1]:
	line[-1] = line[-1].split("(")[0]


	for i in range(len(line)):

	# remove multiple spaces
	line[i] = re.sub(' +', ' ', line[i])
	# remove trailing spaces
	line[i] = line[i].strip()



	if display == True:
	print(line)
	print(" ")

	if '' in line:
	line.remove('')

	list_categories.append([count , line])

	return list_categories



	def technology(name_file, display ):
	# Filepath too specific, need to change to relative path
	file = open(path + 'iea.txt', "r", encoding='utf8')
	lines = file.readlines()

	list_categories = []
	count = -1
	for index, line in enumerate(lines):

	line = line.strip()

	if line == "Close explanation":
	break

	if line != "" and (line[0].isnumeric() and ">" in line and " " in line) :
	count += 1

	if display == True:
	print("Technologies" , count+1 , ":")

	if line != "":

	if line[0].isnumeric() and ">" in line and " " in line:
	i = 0
	try:
	line = line.split(" ")[1]
	except:
	print(line)
	break


	line = line.replace("\n" , " ")
	line = line.replace("/" , " ")
	line = line.replace("-" , " ")
	line = line.strip()
	line = re.sub(' +', ' ', line)
	line = line.split(" ")[0]
	line = line.split(">")


	if "(" in line[-1]:
	line[-1] = line[-1].split("(")[0]


	for i in range(len(line)):

	# remove multiple spaces
	line[i] = re.sub(' +', ' ', line[i])
	# remove trailing spaces
	line[i] = line[i].strip()



	if display == True:
	print(line)
	print(" ")


	list_categories.append([count , line])

	return list_categories


	#################### Companies Functions #############################


	def extract_quantitative_data_technology(technologies, number_technology):


	name_file = "iea"
	dic_target = deployment_target(name_file , False)
	dic_cost = cost_reduction_target(name_file , False)
	dic_details = details(name_file , False)
	cost_target_text = 'No information'
	cost_text = 'No information'

	if number_technology in dic_details:
	reference_text = dic_details[number_technology]


	if number_technology in dic_target:
	cost_target_text = dic_target[number_technology]

	if number_technology in dic_cost:
	cost_text = dic_cost[number_technology]

	return reference_text, cost_target_text, cost_text


	def related_VC_deals(category , number_technology , size):

	number_technology = str(number_technology)


	list_results = list(dic_companies[number_technology].keys())
	res = table_companies[table_companies["portfolio_company_id"].isin(list_results)].set_index("portfolio_company_id")

	table = res.copy()
	for elem in table.index:
	table.loc[ elem , "score" ] = dic_companies[number_technology][str(elem)]

	table = table.sort_values("score" , ascending = False).head(size)
	table = table[['portfolio_company_name', 'year_established','portfolio_company_website','firm_about',
	'portfolio_company_country', 'portfolio_company_state',
	'firm_othernames', 'industry_classification',
	'primary_industry', 'sub_industries', 'score']]


	table["year_established"] = table["year_established"].replace(",", "", regex=True).astype(int, errors='ignore')



	return table




	################################### Extracted texts ###############################################################

	#@title Which patents are related to the technology?


	def finder():
	name_file = 'iea'
	res = technology("iea", False )
	list_categories_tech = []
	list_categories = key_words("iea" , False)
	list_technologies = [ ( ", ".join(list_categories[i][1]) , i ) for i in range(len(list_categories)) ]
	dic_technologies = {}
	for i in range(len(res)):
	names = res[i][1]
	if ", ".join(names) not in list_categories_tech:
	list_categories_tech.append(", ".join(names))
	dic_technologies[", ".join(names)] = []
	dic_technologies[", ".join(names)].append( (", ".join(list_categories[i][1]) , i ))


	list_climate = [ ("Any related papers" , False ) , ("Climate related papers" , True)]

	dic_categories = {}
	for elem in list_technologies:
	list_words = elem[0].split(",")[-3:]
	for i in range(len(list_words)):
	if "CCUS" in list_words[i]:
	list_words[i] = list_words[i].replace("CCUS" , "carbon capture storage")
	dic_categories[elem[1]] = [ ", ".join([ " ".join(words.split()[:3]) for words in list_words ] ) , ", ".join([ " ".join(words.split()[:3]) for words in list_words[:-1] ]) , ", ".join([ " ".join(words.split()[:3]) for words in list_words[1:] ] ) ]

	return dic_technologies, dic_categories, list_categories_tech, list_technologies