##packages code

import streamlit as st
from shapely.geometry import Point
from math import radians, cos, sin, asin, sqrt
import pandas as pd
import re
import json

from sentence_transformers import SentenceTransformer, util


path = 'Climate_site/python_scripts/'

@st.cache_resource
def model_nlp():
    model = SentenceTransformer('all-mpnet-base-v2')
    return model

@st.cache_data
def load_data():
    url = path + "big_ideas_contest.tsv"
    dic = pd.read_csv(url, delimiter = "\t" , index_col = 0).to_dict('index')
    return dic


#################### General Functions #############################

def clean_encoding(encoded_text):
    if encoded_text == None:
        return None
    else:
        if "\n" in encoded_text:
            encoded_text = encoded_text.replace("\n" , "")
        encoded_text = encoded_text[1:-1]
        encoded_text = list(map(float , encoded_text.split(", ")))
        return encoded_text
    

def norm(vector):
    return sqrt(sum(x * x for x in vector))    

def cosine_similarity2(vec_a, vec_b):
        norm_a = norm(vec_a)
        norm_b = norm(vec_b)
        dot = sum(a * b for a, b in zip(vec_a, vec_b))
        return dot / (norm_a * norm_b)

def print_extracted_text(name_file):

    file = open(path + 'iea.txt', "r", encoding='utf8')
    lines = file.readlines()
    count = 0
    for index, line in enumerate(lines):
        read_line = line.strip()
        print(read_line)

    file.close()
    
    
    iea.txt

def details(name_file , display):
    
    file = open(path + "iea.txt", "r")
    lines = file.readlines()

    mark = 0 
    dic_details = {}
    count = -1
    for index, line in enumerate(lines):

        line = line.strip()
        if line == "Close explanation":
            break

        if line != "" and  (line[0].isnumeric() and ">" in line and " 	" in line) :
            count += 1


        if mark == 1 and line != "" and line[0] == "*":
            
            if display == True:
                print(count)
                print(text)
                print(" ")
            dic_details[count] = text
            mark = 0


        if mark == 1:
            text = text + line + " "

        if line.split(" 	")[-1] == "Details" or line.split(" 	")[-1] == "Hide":
            mark = 1
            text = ""
            
    return dic_details


def key_initiatives(name_file , display ):
    
    file = open(path + 'iea.txt', "r", encoding='utf8')
    lines = file.readlines()

      
    mark = 0 
    dic_key_initiatives = {}
    count = -1
    for index, line in enumerate(lines):

        line = line.strip()
        if line == "Close explanation":
            break

        if line != "" and  (line[0].isnumeric() and ">" in line and " 	" in line) :

            count += 1


        if mark == 1 and line != "" and ( (line[0].isnumeric() and  ">" in line and " 	" in line) or line == "*Deployment targets:*" or line == "*Announced development targets:*"):
            if display == True: 
                print(count)
                print(text)
                print(" ")
            
            dic_key_initiatives[count] = text
            mark = 0


        if mark == 1:
            text = text + line + " "

        if line == "*Key initiatives:*":


            mark = 1
            text = ""
            
    return dic_key_initiatives


def deployment_target(name_file , display):
    
    file = open(path + 'iea.txt', "r", encoding='utf8')
    lines = file.readlines()


    mark = 0 
    dic_target = {}
    count = -1
    for index, line in enumerate(lines):

        line = line.strip()
        if line == "Close explanation":
            break

        if line != "" and  (line[0].isnumeric() and ">" in line and " 	" in line) :
            count += 1


        if mark == 1 and line != "" and  ((line[0].isnumeric() and ">" in line and " 	" in line)  or line == "*Announced cost reduction targets:*" or line == "*Announced development targets:*"):
            
            if display == True:
                print(count)
                print(text)
                print(" ")
                
            dic_target[count] = text
            mark = 0


        if mark == 1:
            text = text + line + " "

        if line == "*Deployment targets:*" or line == "*Announced development targets:*":

            mark = 1
            text = ""
            
    return dic_target


def cost_reduction_target(name_file , display):
    
    file = open(path + 'iea.txt', "r", encoding='utf8')
    lines = file.readlines()
    
    mark = 0 
    dic_cost = {}
    count = -1
    for index, line in enumerate(lines):

        line = line.strip()
        
        if line == "Close explanation":
            break

        if line != "" and  (line[0].isnumeric() and ">" in line and " 	" in line) :


            count += 1


        if mark == 1 and line != ""  and (line[0].isnumeric() and ">" in line and " 	" in line) :
            
            if display == True: 
                print(count)
                print(text)
                print(" ")
                
            dic_cost[count] = text
            mark = 0


        if mark == 1:
            text = text + line + " "

        if line == "*Announced cost reduction targets:*":

            mark = 1
            text = ""
            
    return dic_cost


def key_words(name_file, display ):
    
    file = open(path + 'iea.txt', "r", encoding='utf8')
    
    lines = file.readlines()
    
    list_categories = []
    count = -1
    for index, line in enumerate(lines):

        line = line.strip()
        
        if line == "Close explanation":
            break

        if line != "" and  (line[0].isnumeric() and ">" in line and " 	" in line) :
            count += 1
            
            if display == True:
                print("Technologies"  , count+1 , ":")

        if line != "":

            if line[0].isnumeric() and ">" in line and " 	" in line:
                i = 0
                try:
                    line = line.split(" 	")[2]
                except:
                    print(line)
                    break
                
                if "Details" not in lines[index] and "Moderate" not in lines[index]:
                
                    while "	" not in line:
                        i += 1
                        if "Details"==lines[index + i][:7] or "End-use"==lines[index + i][:7]:
                            break
                        else:
                            line = line + "  " +  lines[index + i]

                #if " Production" in line:
                    #line = line.replace(" Production" , "")

                line = line.replace("\n" , " ")
                line = line.replace("/" , " ")
                line = line.replace("-" , " ")
                line = line.split(" 	")[0]

                if "  " in line:
                    line = line.replace("  ", " ")
                line = line.split(">")


                if "(" in line[-1]:
                    line[-1] = line[-1].split("(")[0] 


                for i in range(len(line)):

                    # remove multiple spaces
                    line[i] = re.sub(' +', ' ', line[i])
                    # remove trailing spaces
                    line[i] = line[i].strip()

    
                if display == True:
                    print(line)
                    print(" ")
                    
                if '' in line:
                    line.remove('')

                list_categories.append([count , line])
                
    return list_categories


def technology(name_file, display ):
    # Filepath too specific, need to change to relative path
    file = open(path + 'iea.txt', "r", encoding='utf8')
    lines = file.readlines()
    
    list_categories = []
    count = -1
    for index, line in enumerate(lines):

        line = line.strip()
        
        if line == "Close explanation":
            break

        if line != "" and  (line[0].isnumeric() and ">" in line and " 	" in line) :
            count += 1
            
            if display == True:
                print("Technologies"  , count+1 , ":")

        if line != "":

            if line[0].isnumeric() and ">" in line and " 	" in line:
                i = 0
                try:
                    line = line.split(" 	")[1]
                except:
                    print(line)
                    break
                

                line = line.replace("\n" , " ")
                line = line.replace("/" , " ")
                line = line.replace("-" , " ")
                line = line.strip()
                line = re.sub(' +', ' ', line)
                line = line.split(" 	")[0]
                line = line.split(">")


                if "(" in line[-1]:
                    line[-1] = line[-1].split("(")[0] 


                for i in range(len(line)):

                    # remove multiple spaces
                    line[i] = re.sub(' +', ' ', line[i])
                    # remove trailing spaces
                    line[i] = line[i].strip()

    
                if display == True:
                    print(line)
                    print(" ")
                

                list_categories.append([count , line])
                
    return list_categories

          
#################### Contest Functions #############################


def extract_quantitative_data_technology(technologies, number_technology):
    
      
    name_file = "iea"
    dic_target = deployment_target(name_file , False)
    dic_cost = cost_reduction_target(name_file , False)
    dic_details = details(name_file , False)
    cost_target_text = 'No information'
    cost_text = 'No information'
    
    if number_technology in dic_details:
        reference_text = dic_details[number_technology]
            

    if number_technology in dic_target:
        cost_target_text = dic_target[number_technology]

    if number_technology in dic_cost:
        cost_text = dic_cost[number_technology]

    return reference_text, cost_target_text, cost_text


def big_ideas_encoding(number_technology):
    
    model = model_nlp()
    
    dic_big_ideas = load_data()
    
    dic_matches = {}
    

    name_file = 'iea'
    dic_details = details(name_file , False)
    k = 0 
        
        
    IEA_encoding = model.encode(dic_details[number_technology]  , convert_to_tensor=False , show_progress_bar = False).tolist()
    for count_contest in dic_big_ideas:
        if pd.isna(dic_big_ideas[count_contest]['encoded_description']) != True:
            score = cosine_similarity2(IEA_encoding, clean_encoding(dic_big_ideas[count_contest]['encoded_description']))
            if score > 0.45:
                dic_matches[k] = dic_big_ideas[count_contest]
                dic_matches[k]["score"] = score
                k += 1
                
    
    if len(dic_matches) > 0:
        return pd.DataFrame(dic_matches).T.sort_values("score" , ascending = False)[["Project_name" ,  "Description" ,"Project_date" , "University" , "Field" , "Team_members", "score" ]]  
    else:
        return "No related project found"  
    
    
################################### Extracted texts ###############################################################

#@title Which patents are related to the technology?


def finder():
    name_file = 'iea'
    res = technology("iea", False )
    list_categories_tech = []
    list_categories = key_words("iea" , False)
    list_technologies = [ ( ", ".join(list_categories[i][1]) , i ) for i in range(len(list_categories))  ] 
    dic_technologies = {}
    for i in range(len(res)):
        names = res[i][1]
        if ", ".join(names) not in list_categories_tech:
            list_categories_tech.append(", ".join(names))
            dic_technologies[", ".join(names)] = []
        dic_technologies[", ".join(names)].append( (", ".join(list_categories[i][1]) , i ))
        

    list_climate = [ ("Any related papers" , False ) , ("Climate related papers" , True)]

    dic_categories = {}
    for elem in list_technologies:
        list_words = elem[0].split(",")[-3:]
        for i in range(len(list_words)):
            if "CCUS" in list_words[i]:
                list_words[i] = list_words[i].replace("CCUS" , "carbon capture storage")
        dic_categories[elem[1]]  = [  ", ".join([ " ".join(words.split()[:3]) for words in list_words ] )  ,  ", ".join([ " ".join(words.split()[:3]) for words in list_words[:-1] ]) , ", ".join([ " ".join(words.split()[:3]) for words in list_words[1:] ] ) ]  
    
    return dic_technologies, dic_categories, list_categories_tech, list_technologies