Spaces:

SMD00
/

Image_Summarizer

Runtime error

App Files Files Community

SMD00 commited on Jun 23, 2023

Commit

055dfd2

1 Parent(s): df0e9bd

Create app.py

Browse files

Files changed (1) hide show

app.py +170 -0

app.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import gradio as gr
+from PIL import Image
+import os
+import pytesseract
+import torch
+import numpy as np
+import nltk
+nltk.download('stopwords')
+nltk.download('punkt')
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize, sent_tokenize
+from nltk.cluster.util import cosine_distance
+import networkx as nx
+from transformers import pipeline
+if torch.cuda.is_available():
+   device = torch.device("cuda")
+else:
+   device = torch.device("cpu")
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+def read(filepath):
+    return pytesseract.image_to_string(Image.open(filepath))
+def clean_text(text):
+  article = text.split(".")
+  article=[sentence for sentence in article if sentence!=""]
+  # print(article)
+  sentences = []
+  for sentence in article:
+      #print(sentence)
+      sentence=sentence.replace(",", " , ").replace("'", " ' ").split(" ")
+      #sentence=sentence.replace("[^a-zA-Z]", " ").split(" ")
+      sentence=[word for word in sentence if word!=""]
+      sentences.append(sentence)
+  return sentences
+def sentence_similarity(sent1, sent2, stopwords):   #Creating words in sentences to one hot encoding and then finding cosine distance between the vectors inorder to measure closeness
+    if stopwords is None:
+        stopwords = []
+    sent1 = [w.lower() for w in sent1]
+    sent2 = [w.lower() for w in sent2]
+    all_words = list(set(sent1 + sent2))
+    vector1 = [0] * len(all_words)
+    vector2 = [0] * len(all_words)
+    # build the vector for the first sentence
+    for w in sent1:
+        if w in stopwords:
+            continue
+        vector1[all_words.index(w)] += 1
+    # build the vector for the second sentence
+    for w in sent2:
+        if w in stopwords:
+            continue
+        vector2[all_words.index(w)] += 1
+    return 1 - cosine_distance(vector1, vector2)
+def build_similarity_matrix(sentences, stop_words):
+    # Create an empty similarity matrix
+    similarity_matrix = np.zeros((len(sentences), len(sentences)))
+    for idx1 in range(len(sentences)):
+        for idx2 in range(len(sentences)):
+            if idx1 == idx2: #ignore if both are same sentences
+                continue
+            similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)
+    return similarity_matrix
+def sentences(text, top_n='2'):
+    if top_n==  None or top_n=="":
+      top_n=2
+    top_n=int(top_n)
+    # Step 1 - Clean text to generate sentences
+    sentences=clean_text(text)
+    stop_words = stopwords.words('english')
+    stop_words.append(".")
+    stop_words.append(",")
+    summarize_text = []
+    # Step 2 - Generate Similary Martix across sentences
+    sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)
+    # print(sentence_similarity_martix)
+    # Step 3 - Rank sentences in similarity martix
+    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
+    # print(sentence_similarity_graph)
+    scores = nx.pagerank(sentence_similarity_graph)
+    # print(scores)
+    # Step 4 - Sort the rank and pick top sentences
+    ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)    #Sorting the scores in decending order
+    # print("Indexes of top ranked_sentence order are ", ranked_sentence)
+    for i in range(top_n):
+      ranked_sentence[i][1][0]=ranked_sentence[i][1][0].capitalize()    #Capitalising 1st letter of sentence
+      # print(ranked_sentence[i][1][0])
+      summarize_text.append(" ".join(ranked_sentence[i][1]))
+    # Step 5 - Offcourse, output the summarized text
+    extractive_summarized=". ".join(summarize_text).replace(" , ",", ").replace(" ' ","'") + "."
+    return extractive_summarized
+def important_sentences(filepath, no_of_sentences=5):
+  extractedInformation=read(filepath)
+  extractedInformation=' '.join(extractedInformation.split('\n'))
+  extractive_summary=sentences(extractedInformation, no_of_sentences)
+  text=""
+  for index,sent in enumerate(extractive_summary.split(".")):
+    if sent!='':text+=str(index+1)+". "+str(sent).strip()+".\n\n"
+  return (gr.Textbox.update(text),gr.Button.update(visible=False),gr.Textbox.update(visible=False))
+def summarize(filepath):
+  extractedInformation=read(filepath)
+  extractedInformation=' '.join(extractedInformation.split('\n'))
+  output = summarizer(extractedInformation, max_length=int(len(extractedInformation)/6), min_length=int(len(extractedInformation)/10), do_sample=False)
+  return (gr.Textbox.update(output[0]["summary_text"]),gr.Button.update(visible=False),gr.Textbox.update(visible=False))
+def Question_Answer(filepath,question):
+  extractedInformation=read(filepath)
+  extractedInformation=' '.join(extractedInformation.split('\n'))
+  question_answerer = pipeline("question-answering", model="SMD00/QA_model-roberta")
+  obj=question_answerer(question=question, context=extractedInformation)
+  return obj['answer']
+def show_fn():
+    return (gr.Textbox.update(visible=True),gr.Button.update(visible=True),gr.Textbox.update(""))
+with gr.Blocks() as demo:
+    gr.Markdown("# **PicSum**")
+    gr.Markdown("Gradio demo for PicSum project. You can give an image as input and select any of the three buttons. It generates summary, important sentences and answers questions related to context.")
+    img=gr.components.Image(type="filepath", label="Input Image")
+    with gr.Row():
+        summary = gr.Button(value="Summary")
+        sentence = gr.Button(value="Important Sentences")
+        quesAndAns = gr.Button(value="Question and Answers")
+    ques_box = gr.Textbox(label="Question",interactive=True,visible=False)
+    submit= gr.Button(value="Submit",visible=False)
+    out=gr.Textbox(label="Generated Text")
+    summary.click(fn=summarize,inputs=[img],outputs=[out,submit,ques_box])
+    sentence.click(fn=important_sentences,inputs=[img],outputs=[out,submit,ques_box])
+    quesAndAns.click(fn=show_fn,outputs=[submit,ques_box,out])
+    submit.click(fn=Question_Answer,inputs=[img,ques_box],outputs=[out])
+demo.launch(debug=True)