Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import numpy as np | |
| import pickle | |
| import pandas as pd | |
| from PRNN import PRNN | |
| from PRNN_utils import batch_calculate_grads, check_conditions, check_all_conditions, train_and_val, prepare_folds, process_CVresults, tags2sentence | |
| import nltk | |
| nltk.download('all') | |
| from nltk.tokenize import word_tokenize | |
| def tokens_and_tags(sentence): | |
| # Sample sentence | |
| #sentence = "The quick brown fox jumps over the lazy dog" | |
| # Tokenize the sentence | |
| tokens = word_tokenize(sentence) | |
| # Tag the tokens with POS | |
| tagged_words = nltk.pos_tag(tokens) | |
| # Define the set of desired POS tags | |
| desired_tags = {'JJ', 'NN', 'DT'} | |
| # Initialize lists to store words and tags separately | |
| words = [] | |
| tags = [] | |
| # Iterate over tagged words and filter them | |
| for word, tag in tagged_words: | |
| if tag in desired_tags: | |
| words.append(word) | |
| tags.append(tag) | |
| else: | |
| words.append(word) | |
| tags.append('OT') | |
| # Print the lists of words and tags | |
| # print("Words:", words) | |
| # print("Tags:", tags) | |
| return words, tags | |
| def create_pos_tags(tags = ['NN', 'JJ', 'DT', 'OT']): | |
| liss = [] | |
| pos_dict = {'NN':1, 'DT':2, 'JJ':3, 'OT':4} | |
| for tag in tags: | |
| liss.append(pos_dict[tag]) | |
| return liss | |
| def predict_for_example(sentence, tags, model): | |
| sent_pos_tags = create_pos_tags(tags) | |
| x = tags2sentence(sent_pos_tags) | |
| return model.predict_tags(x) | |
| def get_noun_chunks(sentence, tags, preds): | |
| tokens=sentence | |
| pos_tags=tags | |
| chunk_tags=preds | |
| sequences = [] | |
| noun_chunks = [] | |
| noun_chunks_pos_tags = [] | |
| noun_chunks_tags = [] | |
| start = None | |
| i = 0 | |
| while i < len(chunk_tags): | |
| if chunk_tags[i] == 1: | |
| start = i | |
| if pos_tags[i] == 'NN': | |
| noun_chunks.append([tokens[i]]) | |
| noun_chunks_pos_tags.append([pos_tags[i]]) | |
| noun_chunks_tags.append([chunk_tags[i]]) | |
| while i+1<len(chunk_tags) and chunk_tags[i+1] == 0 and (start is not None): | |
| i+=1 | |
| if (start is not None) and i > start: | |
| noun_chunks.append(tokens[start:i+1]) | |
| noun_chunks_pos_tags.append(pos_tags[start:i+1]) | |
| noun_chunks_tags.append(chunk_tags[start:i+1]) | |
| start =None | |
| i+=1 | |
| noun_chunks = [" ".join(i) for i in noun_chunks] | |
| sequences = [noun_chunks,noun_chunks_pos_tags, noun_chunks_tags] | |
| return sequences[0] | |
| model2 = PRNN() # Instantiate a model | |
| # Loading the dictionary from the file using pickle | |
| with open('CVresults_con_data.pkl', 'rb') as f: | |
| model_dict2 = pickle.load(f) | |
| P_best2, W_best2 = process_CVresults(CVresults_dict=model_dict2, summarize=False) | |
| model2.params = P_best2 | |
| model2.w = W_best2 | |
| model4 = PRNN() # Instantiate a model | |
| # Loading the dictionary from the file using pickle | |
| with open('CVresults_con_data_sigmoid.pkl', 'rb') as f: | |
| model_dict4 = pickle.load(f) | |
| P_best4, W_best4 = process_CVresults(CVresults_dict=model_dict4, summarize=False) | |
| model4.params = P_best4 | |
| model4.w = W_best4 | |
| model1 = PRNN() # Instantiate a model | |
| # Loading the dictionary from the file using pickle | |
| with open('CVresults_data.pkl', 'rb') as f: | |
| model_dict1 = pickle.load(f) | |
| P_best1, W_best1 = process_CVresults(CVresults_dict=model_dict1, summarize=False) | |
| model1.params = P_best1 | |
| model1.w = W_best1 | |
| model3 = PRNN() # Instantiate a model | |
| # Loading the dictionary from the file using pickle | |
| with open('CVresults_data_sigmoid.pkl', 'rb') as f: | |
| model_dict3 = pickle.load(f) | |
| P_best3, W_best3 = process_CVresults(CVresults_dict=model_dict3, summarize=False) | |
| model3.params = P_best3 | |
| model3.w = W_best3 | |
| def demo_(sentence): | |
| sentence, tags = tokens_and_tags(sentence) | |
| preds1=predict_for_example(sentence=sentence, tags=tags, model=model1) | |
| preds3=predict_for_example(sentence=sentence, tags=tags, model=model3) | |
| preds2=predict_for_example(sentence=sentence, tags=tags, model=model2) | |
| preds4=predict_for_example(sentence=sentence, tags=tags, model=model4) | |
| return "predicted labels:\t"+str(preds2)+"\n"+"predicted Noun chunks \t"+str(get_noun_chunks(sentence=sentence, tags=tags,preds=preds2)),"predicted labels:\t"+str(preds4)+"\n"+"predicted Noun chunks \t"+str(get_noun_chunks(sentence=sentence, tags=tags,preds=preds4)),"predicted labels:\t"+str(preds1)+"\n"+"predicted Noun chunks \t"+str(get_noun_chunks(sentence=sentence, tags=tags,preds=preds1)),"predicted labels:\t"+str(preds3)+"\n"+"predicted Noun chunks \t"+str(get_noun_chunks(sentence=sentence, tags=tags,preds=preds3)),tags | |
| title="POS-Tagged Corpus Analysis: Training a Recurrent Perceptron for Noun Chunk Identification" | |
| demo = gr.Interface(fn=demo_, inputs=gr.Textbox(label="sentence for which you want noun chunks",lines=1, interactive=True, show_copy_button=True), outputs=[gr.Textbox(label="prediction on conditioned data with step activation function",lines=2, interactive=True, show_copy_button=True),gr.Textbox(label="prediction on conditioned data with sigmoid activation function",lines=2, interactive=True, show_copy_button=True),gr.Textbox(label="prediction on all data with step activation function",lines=2, interactive=True, show_copy_button=True),gr.Textbox(label="prediction on all data with sigmoid activation function",lines=2, interactive=True, show_copy_button=True),gr.Textbox(label="pos tag label given by nltk library",lines=1, interactive=True, show_copy_button=True)],title=title) | |
| demo.launch(share=True) |