import tokenizer as text_rank_tokenizer
import ranker as text_rank_ranker
import numpy as np

stop_words = open("./text_rank/stopwords.txt",'r',encoding="utf-8").read()
word_endings = open("./text_rank/word_endings.txt",'r',encoding='utf-8').read() 
kriyapads = open("./text_rank/minimal_kriyapad.txt",'r',encoding="utf-8").read().split("\n")
samyojaks = open("./text_rank/samyojak.txt",'r',encoding="utf-8").read().split("\n")
valid_chars = "./text_rank/valid_chars.json"

def get_summary_from_text(text,force_use_purnabiram_model=False):
    global stop_words, word_endings, kriyapads, samyojaks
   
    is_complete_sentence = True
    # if "।" not in text:
    purnabiram_count = text.count("।") 
    if not force_use_purnabiram_model:
        if purnabiram_count*100 < len(text):
            is_complete_sentence = False
    else:
        is_complete_sentence = False
    # print(is_complete_sentence)   

    valid_characters = text_rank_tokenizer.get_valid_chars(valid_chars)
   
      
    if not is_complete_sentence:
        text = text_rank_tokenizer.add_purnabiram(text,kriyapads,samyojaks)
    
    #
    # Split the sentence into array of words and patagraph in its array. (as Array of Array of the words)
    #
    sentences = text_rank_tokenizer.get_sentences_as_arr(text)
    # print(sentences)

    text = text_rank_tokenizer.remove_useless_characters(text,valid_characters)


    sentences = text_rank_tokenizer.remove_repeating_sentences(sentences)
    
    if len(sentences) == 0:
        return "It is not a valid text. Please try again with a valid text."
    elif len(sentences) == 1:
        return sentences
    
    # print(sentences)
    words_arr = text_rank_tokenizer.get_words_as_arr(sentences)    
    #
    # Remove the stop words from the array
    #
    words_arr = text_rank_tokenizer.remove_stop_words_and_filter_word_arr(words_arr,word_endings, stop_words)
    # print(words_arr)
    
    #
    # remove empty sentences and lone word sentences and update sentences accordingly
    #    
    sentences, words_arr = text_rank_tokenizer.remove_empty_sentences(sentences, words_arr)
    #
    # Tokenize the words and sentences into numbers
    # 
    tokens, token_dict = text_rank_tokenizer.tokenize(words_arr)
    # 
    # Create a association matrix
    # 
    association_matrix, counter_vector = text_rank_ranker.create_association_matrix(tokens,No_of_unique_chars= len(token_dict))
    # 
    # Calculate influence of each word on the paragraph
    # 
    word_influence_vector = text_rank_ranker.calculate_word_ranks(association_matrix, counter_vector)
    # 
    # Based in the word importance ranking, calculate teh sentence importance ranking.
    # 
    sentence_influence = text_rank_ranker.calculate_sentence_influence(tokens,word_influence_vector)
    
    # 
    # Get first n sentences from the given text as summarized text.
    # 
    
    # print(sentence_influence)
    summary_sentences = text_rank_ranker.get_n_influencial_sentence(sentences,sentence_influence,n=np.ceil(len(sentences)*0.33))


    summarized_text = text_rank_ranker.get_summarized_text(summary_sentences)
    
    return summarized_text