Spaces:
Runtime error
Runtime error
File size: 7,271 Bytes
055dfd2 195d5cb 055dfd2 0650090 055dfd2 0650090 055dfd2 0650090 055dfd2 9774ecb 055dfd2 1f0fa3b 055dfd2 9774ecb 055dfd2 9774ecb 055dfd2 9774ecb 055dfd2 9774ecb 055dfd2 1f0fa3b 055dfd2 9774ecb 1f0fa3b 9774ecb 12b481f 9774ecb 055dfd2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
import gradio as gr
from PIL import Image
import pytesseract
import torch
import numpy as np
import nltk
nltk.download('stopwords')
nltk.download('punkt')
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import networkx as nx
from transformers import pipeline
if torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def read(filepath):
return pytesseract.image_to_string(Image.open(filepath))
def clean_text(text):
article = text.split(".")
article=[sentence for sentence in article if sentence!=""]
sentences = []
for sentence in article:
sentence=sentence.replace(",", " , ").replace("'", " ' ").split(" ")
sentence=[word for word in sentence if word!=""]
sentences.append(sentence)
return sentences
def sentence_similarity(sent1, sent2, stopwords): #Creating words in sentences to one hot encoding and then finding cosine distance between the vectors inorder to measure closeness
if stopwords is None:
stopwords = []
sent1 = [w.lower() for w in sent1]
sent2 = [w.lower() for w in sent2]
all_words = list(set(sent1 + sent2))
vector1 = [0] * len(all_words)
vector2 = [0] * len(all_words)
# build the vector for the first sentence
for w in sent1:
if w in stopwords:
continue
vector1[all_words.index(w)] += 1
# build the vector for the second sentence
for w in sent2:
if w in stopwords:
continue
vector2[all_words.index(w)] += 1
if np.isnan(1 - cosine_distance(vector1, vector2)):
return 0
return 1 - cosine_distance(vector1, vector2)
def build_similarity_matrix(sentences, stop_words):
# Create an empty similarity matrix
similarity_matrix = np.zeros((len(sentences), len(sentences)))
for idx1 in range(len(sentences)):
for idx2 in range(len(sentences)):
if idx1 == idx2: #ignore if both are same sentences
continue
similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)
return similarity_matrix
def sentences(text, top_n="auto"):
# Step 1 - Clean text to generate sentences
sentences=clean_text(text)
stop_words = stopwords.words('english')
stop_words.append(".")
stop_words.append(",")
summarize_text = []
# Step 2 - Generate Similary Martix across sentences
sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)
# print(sentence_similarity_martix)
# Step 3 - Rank sentences in similarity martix
sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
# print(sentence_similarity_graph)
scores = nx.pagerank(sentence_similarity_graph)
# print(scores)
# Step 4 - Sort the rank and pick top sentences
ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True) #Sorting the scores in decending order
# print("Indexes of top ranked_sentence order are ", ranked_sentence)
if top_n=="auto": top_n=len(ranked_sentence)
else: top_n=int(top_n)
for i in range(top_n):
ranked_sentence[i][1][0]=ranked_sentence[i][1][0].capitalize() #Capitalising 1st letter of sentence
# print(ranked_sentence[i][1][0])
summarize_text.append(" ".join(ranked_sentence[i][1]))
# Step 5 - Offcourse, output the summarized text
extractive_summarized=". ".join(summarize_text).replace(" , ",", ").replace(" ' ","'") + "."
return extractive_summarized
def important_sentences(filepath, no_of_sentences=5):
extractedInformation=read(filepath)
extractedInformation=' '.join(extractedInformation.split('\n'))
try:
extractive_summary=sentences(extractedInformation, no_of_sentences)
except:
extractive_summary=sentences(extractedInformation,"auto")
text=""
for index,sent in enumerate(extractive_summary.split(".")):
if sent!='':text+=str(index+1)+". "+str(sent).strip()+".\n\n"
return (gr.Textbox.update(text),gr.Button.update(visible=False),gr.Textbox.update(visible=False),gr.Dropdown.update(visible=False))
def summarize(filepath):
extractedInformation=read(filepath)
extractedInformation=' '.join(extractedInformation.split('\n'))
abstractive_summary = summarizer(extractedInformation, max_length=int(len(extractedInformation)/6), min_length=int(len(extractedInformation)/10), do_sample=False)
return (gr.Textbox.update(abstractive_summary[0]["summary_text"]),gr.Button.update(visible=False),gr.Textbox.update(visible=False),gr.Dropdown.update(visible=False))
def Question_Answer(filepath,question,mod):
extractedInformation=read(filepath)
extractedInformation=' '.join(extractedInformation.split('\n'))
if mod=="Roberta":
question_answerer = pipeline("question-answering", model="SMD00/QA_model-roberta")
else :
question_answerer = pipeline("question-answering", model="SMD00/QA_model-distilbert")
obj=question_answerer(question=question, context=extractedInformation)
return obj['answer']
def show_fn():
return (gr.Textbox.update(visible=True),gr.Button.update(visible=True),gr.Dropdown.update(visible=True),gr.Textbox.update(""))
def dummy_fn(x):
return x
with gr.Blocks() as demo:
gr.Markdown("# **PicSum**")
gr.Markdown("Gradio demo for PicSum project. You can give an image as input and select any of the three buttons. It generates summary, important sentences and answers questions related to context.")
img=gr.components.Image(type="filepath", label="Input Image")
with gr.Row():
summary_btn = gr.Button(value="Summary")
sentence_btn = gr.Button(value="Important Sentences")
quesAndAns_btn = gr.Button(value="Question and Answers")
mode=gr.Dropdown(["Roberta","DistilBert"],label="Model",info="Choose a model",visible=False)
ques_box = gr.Textbox(label="Question",info="Enter a Question",interactive=True,visible=False)
submit_btn= gr.Button(value="Submit",visible=False)
out_box=gr.Textbox(label="Generated Text")
summary_btn.click(fn=summarize,inputs=[img],outputs=[out_box,submit_btn,ques_box,mode])
sentence_btn.click(fn=important_sentences,inputs=[img],outputs=[out_box,submit_btn,ques_box,mode])
quesAndAns_btn.click(fn=show_fn,outputs=[submit_btn,ques_box,mode,out_box])
submit_btn.click(fn=Question_Answer,inputs=[img,ques_box,mode],outputs=[out_box])
gr.Markdown("## Image Examples")
with gr.Row():
gr.Examples(
examples=[ "a.png"],
inputs=img,
outputs=img,
fn=dummy_fn,
cache_examples=True,
)
gr.Examples(
examples=[ "b.png"],
inputs=img,
outputs=img,
fn=dummy_fn,
cache_examples=True,
)
gr.Examples(
examples=[ "c.png"],
inputs=img,
outputs=img,
fn=dummy_fn,
cache_examples=True,
)
gr.Examples(
examples=[ "d.png"],
inputs=img,
outputs=img,
fn=dummy_fn,
cache_examples=True,
)
demo.launch(debug=True) |