Spaces:

scholarly360
/

contracts-summary

Runtime error

App Files Files Community

scholarly360 commited on Aug 4, 2024

Commit

370ba10

verified ·

1 Parent(s): 670a315

Create app.py

Browse files

Files changed (1) hide show

app.py +130 -0

app.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import streamlit as st
+st.set_page_config(layout="wide")
+from annotated_text import annotated_text, annotation
+import fitz
+import os
+import chromadb
+import uuid
+from pathlib import Path
+os.environ['OPENAI_API_KEY'] = os.environ['OPEN_API_KEY']
+st.title("Contracts Summary ")
+import pandas as pd
+from langchain.retrievers import BM25Retriever, EnsembleRetriever
+from langchain.schema import Document
+from langchain.vectorstores import Chroma
+from langchain.embeddings import HuggingFaceEmbeddings
+import spacy
+# Load the English model from SpaCy
+nlp = spacy.load("en_core_web_md")
+def util_upload_file_and_return_list_docs(uploaded_files):
+    #util_del_cwd()
+    list_docs = []
+    list_save_path = []
+    for uploaded_file in uploaded_files:
+        save_path = Path(os.getcwd(), uploaded_file.name)
+        with open(save_path, mode='wb') as w:
+            w.write(uploaded_file.getvalue())
+        #print('save_path:', save_path)
+        docs = fitz.open(save_path)
+        list_docs.append(docs)
+        list_save_path.append(save_path)
+    return(list_docs, list_save_path)
+def util_get_list_page_and_passage(list_docs, list_save_path):
+    #page_documents = []
+    documents = []
+    for ind_doc, docs in enumerate(list_docs):
+        text = ''
+        for txt_index, txt_page in enumerate(docs):
+            text = text + txt_page.get_text()
+        documents.append(text)
+    return(documents)
+documents = []
+def get_summary_single_doc(text):
+    from langchain.llms import OpenAI
+    from langchain.chains.summarize import load_summarize_chain
+    from langchain.text_splitter import CharacterTextSplitter
+    from langchain.prompts import PromptTemplate
+    from langchain.llms import OpenAI
+    from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+    LLM_KEY=os.environ.get("OPEN_API_KEY")
+    text_splitter = CharacterTextSplitter(
+            separator="\n",
+            chunk_size=3000,
+            chunk_overlap=20
+        )
+    #create the documents from list of texts
+    texts = text_splitter.create_documents([text])
+    prompt_template = """Write a concise summary of the following:
+    {text}
+    CONCISE SUMMARY:"""
+    prompt = PromptTemplate.from_template(prompt_template)
+    refine_template = (
+        "Your job is to produce a final summary with key learnings\n"
+        "We have provided an existing summary up to a certain point: {existing_answer}\n"
+        "We have the opportunity to refine the existing summary"
+        "(only if needed) with detailed context below.\n"
+        "------------\n"
+        "{text}\n"
+        "------------\n"
+        "Given the new context, refine the original summary"
+        "If the context isn't useful, return the original summary."
+    )
+    refine_prompt = PromptTemplate.from_template(refine_template)
+    #Define the LLM
+    # here we are using OpenAI's ChatGPT
+    from langchain.chat_models import ChatOpenAI
+    model_name = "gpt-3.5-turbo"
+    llm=ChatOpenAI(temperature=0, openai_api_key=LLM_KEY, model_name=model_name)
+    refine_chain = load_summarize_chain(
+        llm,
+        chain_type="refine",
+        question_prompt=prompt,
+        refine_prompt=refine_prompt,
+        return_intermediate_steps=True,
+    )
+    refine_outputs = refine_chain({'input_documents': texts})
+    return(refine_outputs['output_text'])
+with st.form("my_form"):
+    multi = '''1. Download and Upload contract (PDF) .
+    e.g. https://www.barc.gov.in/tenders/GCC-LPS.pdf
+    e.g. https://www.montrosecounty.net/DocumentCenter/View/823/Sample-Construction-Contract
+    '''
+    st.markdown(multi)
+    multi = '''2. Press Summary .'''
+    st.markdown(multi)
+    multi = '''
+    ** Attempt is made for summary ** \n
+    '''
+    st.markdown(multi)
+    #uploaded_file = st.file_uploader("Choose a file")
+    list_docs = []
+    list_save_path = []
+    uploaded_files = st.file_uploader("Choose file(s)", accept_multiple_files=True)
+    submitted = st.form_submit_button("Summary")
+    if submitted and (uploaded_files is not None):
+        list_docs, list_save_path = util_upload_file_and_return_list_docs(uploaded_files)
+        documents = util_get_list_page_and_passage(list_docs, list_save_path)
+        for index, item in enumerate(documents):
+            st.write('Summary' + str(index+1) +  ' :: ')
+            st.write(get_summary_single_doc(item))