Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from PIL import Image | |
| import pytesseract | |
| import torch | |
| import numpy as np | |
| import nltk | |
| nltk.download('stopwords') | |
| nltk.download('punkt') | |
| from nltk.corpus import stopwords | |
| from nltk.cluster.util import cosine_distance | |
| import networkx as nx | |
| from transformers import pipeline | |
| if torch.cuda.is_available(): | |
| device = torch.device("cuda") | |
| else: | |
| device = torch.device("cpu") | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| def read(filepath): | |
| return pytesseract.image_to_string(Image.open(filepath)) | |
| def clean_text(text): | |
| article = text.split(".") | |
| article=[sentence for sentence in article if sentence!=""] | |
| sentences = [] | |
| for sentence in article: | |
| sentence=sentence.replace(",", " , ").replace("'", " ' ").split(" ") | |
| sentence=[word for word in sentence if word!=""] | |
| sentences.append(sentence) | |
| return sentences | |
| def sentence_similarity(sent1, sent2, stopwords): #Creating words in sentences to one hot encoding and then finding cosine distance between the vectors inorder to measure closeness | |
| if stopwords is None: | |
| stopwords = [] | |
| sent1 = [w.lower() for w in sent1] | |
| sent2 = [w.lower() for w in sent2] | |
| all_words = list(set(sent1 + sent2)) | |
| vector1 = [0] * len(all_words) | |
| vector2 = [0] * len(all_words) | |
| # build the vector for the first sentence | |
| for w in sent1: | |
| if w in stopwords: | |
| continue | |
| vector1[all_words.index(w)] += 1 | |
| # build the vector for the second sentence | |
| for w in sent2: | |
| if w in stopwords: | |
| continue | |
| vector2[all_words.index(w)] += 1 | |
| if np.isnan(1 - cosine_distance(vector1, vector2)): | |
| return 0 | |
| return 1 - cosine_distance(vector1, vector2) | |
| def build_similarity_matrix(sentences, stop_words): | |
| # Create an empty similarity matrix | |
| similarity_matrix = np.zeros((len(sentences), len(sentences))) | |
| for idx1 in range(len(sentences)): | |
| for idx2 in range(len(sentences)): | |
| if idx1 == idx2: #ignore if both are same sentences | |
| continue | |
| similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words) | |
| return similarity_matrix | |
| def sentences(text, top_n="auto"): | |
| # Step 1 - Clean text to generate sentences | |
| sentences=clean_text(text) | |
| stop_words = stopwords.words('english') | |
| stop_words.append(".") | |
| stop_words.append(",") | |
| summarize_text = [] | |
| # Step 2 - Generate Similary Martix across sentences | |
| sentence_similarity_martix = build_similarity_matrix(sentences, stop_words) | |
| # print(sentence_similarity_martix) | |
| # Step 3 - Rank sentences in similarity martix | |
| sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix) | |
| # print(sentence_similarity_graph) | |
| scores = nx.pagerank(sentence_similarity_graph) | |
| # print(scores) | |
| # Step 4 - Sort the rank and pick top sentences | |
| ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True) #Sorting the scores in decending order | |
| # print("Indexes of top ranked_sentence order are ", ranked_sentence) | |
| if top_n=="auto": top_n=len(ranked_sentence) | |
| else: top_n=int(top_n) | |
| for i in range(top_n): | |
| ranked_sentence[i][1][0]=ranked_sentence[i][1][0].capitalize() #Capitalising 1st letter of sentence | |
| # print(ranked_sentence[i][1][0]) | |
| summarize_text.append(" ".join(ranked_sentence[i][1])) | |
| # Step 5 - Offcourse, output the summarized text | |
| extractive_summarized=". ".join(summarize_text).replace(" , ",", ").replace(" ' ","'") + "." | |
| return extractive_summarized | |
| def important_sentences(filepath, no_of_sentences=5): | |
| extractedInformation=read(filepath) | |
| extractedInformation=' '.join(extractedInformation.split('\n')) | |
| try: | |
| extractive_summary=sentences(extractedInformation, no_of_sentences) | |
| except: | |
| extractive_summary=sentences(extractedInformation,"auto") | |
| text="" | |
| for index,sent in enumerate(extractive_summary.split(".")): | |
| if sent!='':text+=str(index+1)+". "+str(sent).strip()+".\n\n" | |
| return (gr.Textbox.update(text),gr.Button.update(visible=False),gr.Textbox.update(visible=False),gr.Dropdown.update(visible=False)) | |
| def summarize(filepath): | |
| extractedInformation=read(filepath) | |
| extractedInformation=' '.join(extractedInformation.split('\n')) | |
| abstractive_summary = summarizer(extractedInformation, max_length=int(len(extractedInformation)/6), min_length=int(len(extractedInformation)/10), do_sample=False) | |
| return (gr.Textbox.update(abstractive_summary[0]["summary_text"]),gr.Button.update(visible=False),gr.Textbox.update(visible=False),gr.Dropdown.update(visible=False)) | |
| def Question_Answer(filepath,question,mod): | |
| extractedInformation=read(filepath) | |
| extractedInformation=' '.join(extractedInformation.split('\n')) | |
| if mod=="Roberta": | |
| question_answerer = pipeline("question-answering", model="SMD00/QA_model-roberta") | |
| else : | |
| question_answerer = pipeline("question-answering", model="SMD00/QA_model-distilbert") | |
| obj=question_answerer(question=question, context=extractedInformation) | |
| return obj['answer'] | |
| def show_fn(): | |
| return (gr.Textbox.update(visible=True),gr.Button.update(visible=True),gr.Dropdown.update(visible=True),gr.Textbox.update("")) | |
| def dummy_fn(x): | |
| return x | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# **PicSum**") | |
| gr.Markdown("Gradio demo for PicSum project. You can give an image as input and select any of the three buttons. It generates summary, important sentences and answers questions related to context.") | |
| img=gr.components.Image(type="filepath", label="Input Image") | |
| with gr.Row(): | |
| summary_btn = gr.Button(value="Summary") | |
| sentence_btn = gr.Button(value="Important Sentences") | |
| quesAndAns_btn = gr.Button(value="Question and Answers") | |
| mode=gr.Dropdown(["Roberta","DistilBert"],label="Model",info="Choose a model",visible=False) | |
| ques_box = gr.Textbox(label="Question",info="Enter a Question",interactive=True,visible=False) | |
| submit_btn= gr.Button(value="Submit",visible=False) | |
| out_box=gr.Textbox(label="Generated Text") | |
| summary_btn.click(fn=summarize,inputs=[img],outputs=[out_box,submit_btn,ques_box,mode]) | |
| sentence_btn.click(fn=important_sentences,inputs=[img],outputs=[out_box,submit_btn,ques_box,mode]) | |
| quesAndAns_btn.click(fn=show_fn,outputs=[submit_btn,ques_box,mode,out_box]) | |
| submit_btn.click(fn=Question_Answer,inputs=[img,ques_box,mode],outputs=[out_box]) | |
| gr.Markdown("## Image Examples") | |
| with gr.Row(): | |
| gr.Examples( | |
| examples=[ "a.png"], | |
| inputs=img, | |
| outputs=img, | |
| fn=dummy_fn, | |
| cache_examples=True, | |
| ) | |
| gr.Examples( | |
| examples=[ "b.png"], | |
| inputs=img, | |
| outputs=img, | |
| fn=dummy_fn, | |
| cache_examples=True, | |
| ) | |
| gr.Examples( | |
| examples=[ "c.png"], | |
| inputs=img, | |
| outputs=img, | |
| fn=dummy_fn, | |
| cache_examples=True, | |
| ) | |
| gr.Examples( | |
| examples=[ "d.png"], | |
| inputs=img, | |
| outputs=img, | |
| fn=dummy_fn, | |
| cache_examples=True, | |
| ) | |
| demo.launch(debug=True) |