Spaces:

autosummproject
/

autosumm

Runtime error

App Files Files Community

mhsvieira commited on Dec 1, 2021

Commit

2b3e58c

1 Parent(s): 30ce4c0

UI improvements

Browse files

Files changed (3) hide show

AutoSumm.png +0 -0
app.py +36 -7
extractor/_utils.py +22 -5

AutoSumm.png ADDED Viewed

app.py CHANGED Viewed

@@ -29,23 +29,43 @@ def main():
     search_model, summ_model, tokenizer = init()
     Timer.reset()
-    st.title("AutoSumm")
     st.subheader("Lucas Antunes & Matheus Vieira")
     portuguese = st.checkbox('Traduzir para o português.')
     if portuguese:
         environ['PORTUGUESE'] = 'true' # work around (gambiarra)
-        st.subheader("Digite o tópico sobre o qual você deseja gerar um resumo")
-        query_pt = st.text_input('Digite o tópico') #text is stored in this variable
         button = st.button('Gerar resumo')
     else:
         environ['PORTUGUESE'] = 'false' # work around (gambiarra)
-        st.subheader("Type the desired topic to generate the summary")
-        query = st.text_input('Type your topic') #text is stored in this variable
         button = st.button('Generate summary')
-    result = st.empty()
     if 'few_documents' not in st.session_state:
         st.session_state['few_documents'] = False
@@ -68,22 +88,31 @@ def main():
             if portuguese:
                 result.markdown(f'Seu resumo para "{query_pt}":\n\n> {translate(summary, "en", "pt")}')
             else:
                 result.markdown(f'Your summary for "{query}":\n\n> {summary}')
             Timer.show_total()
     if few_documents:
         st.warning(st.session_state['msg'])
-        if st.button('Prosseguir'):
             text = extract(query, search_model=search_model, extracted_documents=st.session_state['documents'])
             summary = summarize(text, summ_model, tokenizer)
             if portuguese:
                 result.markdown(f'Seu resumo para "{query_pt}":\n\n> {translate(summary, "en", "pt")}')
             else:
                 result.markdown(f'Your summary for "{query}":\n\n> {summary}')
             st.session_state['few_documents'] = False
             few_documents = False

     search_model, summ_model, tokenizer = init()
     Timer.reset()
+    _, col2, _ = st.columns([1,1,1])
+    col2.image('AutoSumm.png', width=250)
     st.subheader("Lucas Antunes & Matheus Vieira")
     portuguese = st.checkbox('Traduzir para o português.')
+    st.sidebar.markdown("""
+    # Processing steps
+    #### Translation
+    Step where the system translates the user's query from Portuguese to English and the summary from English to Portuguese.
+    #### Corpus generation
+    Step where the system generates the complete corpus: query-related web pages and documents (PDFs and text files) on query-related knowledge area. The Corpus for this model was built to gather documents related to the Blue Amazon, a maritime region in South America.
+    #### Exhaustive search
+    Step where the system filters the texts of the corpus that contain keywords from the query.
+    #### Semantic search over documents
+    Step in which the system selects documents related to the query through semantic search.
+    #### Semantic search over paragraphs
+    Step in which the system breaks documents into paragraphs and selects those related to the query through semantic search.
+    #### Abstraction
+    Step in which the system generates an abstractive summary about the query from the best three paragraphs of the previous step.
+    """)
     if portuguese:
         environ['PORTUGUESE'] = 'true' # work around (gambiarra)
+        query_pt = st.text_input('Digite o tópico sobre o qual você deseja gerar um resumo') #text is stored in this variable
         button = st.button('Gerar resumo')
     else:
         environ['PORTUGUESE'] = 'false' # work around (gambiarra)
+        query = st.text_input('Type the desired topic to generate the summary') #text is stored in this variable
         button = st.button('Generate summary')
+    result = st.container()
     if 'few_documents' not in st.session_state:
         st.session_state['few_documents'] = False
             if portuguese:
                 result.markdown(f'Seu resumo para "{query_pt}":\n\n> {translate(summary, "en", "pt")}')
+                with result.expander(f'Parágrafos usados na geração do resumo'):
+                    st.markdown(translate(text, "en", "pt").replace('\n', '\n\n'))
             else:
                 result.markdown(f'Your summary for "{query}":\n\n> {summary}')
+                with result.expander(f'Paragraphs used in summarization'):
+                    st.markdown(text.replace('\n', '\n\n'))
             Timer.show_total()
     if few_documents:
         st.warning(st.session_state['msg'])
+        msg = 'Prosseguir' if portuguese else 'Proceed'
+        if st.button(msg):
             text = extract(query, search_model=search_model, extracted_documents=st.session_state['documents'])
             summary = summarize(text, summ_model, tokenizer)
             if portuguese:
                 result.markdown(f'Seu resumo para "{query_pt}":\n\n> {translate(summary, "en", "pt")}')
+                with result.expander(f'Parágrafos usados na geração do resumo'):
+                    st.markdown(translate(text, "en", "pt").replace('\n', '\n\n'))
             else:
                 result.markdown(f'Your summary for "{query}":\n\n> {summary}')
+                with result.expander(f'Paragraphs used in summarization'):
+                    st.markdown(text.replace('\n', '\n\n'))
             st.session_state['few_documents'] = False
             few_documents = False

extractor/_utils.py CHANGED Viewed

@@ -3,6 +3,7 @@ import numpy as np
 import streamlit as st
 # import inflect
 import torch
 # p = inflect.engine()
@@ -23,6 +24,13 @@ def document_extraction(dataset, query, keywords, min_document_size, min_just_on
   lower_query = query.lower()
   lower_keywords = [keyword.lower() for keyword in keywords]
   documents = {}
   documents['QUERY'] = [
@@ -61,7 +69,10 @@ def document_extraction(dataset, query, keywords, min_document_size, min_just_on
   if all(empty.values()):
         # TODO: throw error
         st.info(empty.values())
-        st.warning(f'No document found for the query "{query}", please try with another query')
         st.stop()
   if sizes['QUERY'] >= 10:
@@ -72,10 +83,16 @@ def document_extraction(dataset, query, keywords, min_document_size, min_just_on
       extracted_documents = documents['OR']
   else:
       number_of_documents = sizes['OR']
-      raise FewDocumentsError(documents['OR'], number_of_documents,
-        f'Only {number_of_documents} documents found for the query "{query}"\n\
-        Please select continue to proceed with {number_of_documents} documents or try again with another query'
-      )
   return extracted_documents, empty, sizes

 import streamlit as st
 # import inflect
 import torch
+from os import environ
 # p = inflect.engine()
   lower_query = query.lower()
   lower_keywords = [keyword.lower() for keyword in keywords]
+  if environ['PORTUGUESE'] == 'true':
+    portuguese = True
+  elif environ['PORTUGUESE'] == 'false':
+    portuguese = False
+  else:
+    raise EnvironmentError
   documents = {}
   documents['QUERY'] = [
   if all(empty.values()):
         # TODO: throw error
         st.info(empty.values())
+        if portuguese:
+          st.warning(f'Nenhum documento encontrado para a query "{query}", por favor, tente com outra query')
+        else:
+          st.warning(f'No document found for the query "{query}", please try with another query')
         st.stop()
   if sizes['QUERY'] >= 10:
       extracted_documents = documents['OR']
   else:
       number_of_documents = sizes['OR']
+      if portuguese:
+        raise FewDocumentsError(documents['OR'], number_of_documents,
+          f'Somente {number_of_documents} documentos encontrados para a query "{query}"\n\
+          Por favor selecione "Prosseguir" para prosseguir com {number_of_documents} documentos ou tente novamente com outra query'
+        )
+      else:
+        raise FewDocumentsError(documents['OR'], number_of_documents,
+          f'Only {number_of_documents} documents found for the query "{query}"\n\
+          Please select "Proceed" to proceed with {number_of_documents} documents or try again with another query'
+        )
   return extracted_documents, empty, sizes