Spaces:
Runtime error
Runtime error
File size: 4,597 Bytes
370ba10 7bf14b3 370ba10 238e05a 370ba10 498197b 370ba10 9caf390 370ba10 523a530 370ba10 238e05a 370ba10 f52d6dc 370ba10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import streamlit as st
st.set_page_config(layout="wide")
import warnings
warnings.filterwarnings("ignore", message=".*ScriptRunContext.*")
from annotated_text import annotated_text, annotation
import fitz
import os
import uuid
from pathlib import Path
import time
os.environ['OPENAI_API_KEY'] = os.environ['OPEN_API_KEY']
st.title("Contracts Summary ")
import pandas as pd
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever
from langchain.schema import Document
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
import spacy
# Load the English model from SpaCy
from spacy_download import load_spacy
nlp = load_spacy("en_core_web_sm")
def util_upload_file_and_return_list_docs(uploaded_files):
#util_del_cwd()
list_docs = []
list_save_path = []
for uploaded_file in uploaded_files:
save_path = Path(os.getcwd(), uploaded_file.name)
with open(save_path, mode='wb') as w:
w.write(uploaded_file.getvalue())
#print('save_path:', save_path)
docs = fitz.open(save_path)
list_docs.append(docs)
list_save_path.append(save_path)
return(list_docs, list_save_path)
def util_get_list_page_and_passage(list_docs, list_save_path):
#page_documents = []
documents = []
for ind_doc, docs in enumerate(list_docs):
text = ''
for txt_index, txt_page in enumerate(docs):
text = text + txt_page.get_text()
documents.append(text)
return(documents)
documents = []
def get_summary_single_doc(text):
time.sleep(5)
from langchain.llms import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
LLM_KEY=os.environ.get("OPEN_API_KEY")
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=3000,
chunk_overlap=20
)
#create the documents from list of texts
texts = text_splitter.create_documents([text])
prompt_template = """Write a concise summary of the following:
{text}
CONCISE SUMMARY:"""
prompt = PromptTemplate.from_template(prompt_template)
refine_template = (
"Your job is to produce a final summary with key learnings\n"
"We have provided an existing summary up to a certain point: {existing_answer}\n"
"We have the opportunity to refine the existing summary"
"(only if needed) with detailed context below.\n"
"------------\n"
"{text}\n"
"------------\n"
"Given the new context, refine the original summary"
"If the context isn't useful, return the original summary."
)
refine_prompt = PromptTemplate.from_template(refine_template)
#Define the LLM
# here we are using OpenAI's ChatGPT
from langchain.chat_models import ChatOpenAI
model_name = "gpt-4o-mini"
llm=ChatOpenAI(temperature=0, openai_api_key=LLM_KEY, model_name=model_name)
refine_chain = load_summarize_chain(
llm,
chain_type="refine",
question_prompt=prompt,
refine_prompt=refine_prompt,
return_intermediate_steps=True,
)
refine_outputs = refine_chain({'input_documents': texts})
return(refine_outputs['output_text'])
with st.form("my_form"):
multi = '''1. Download and Upload contract (PDF) .
e.g. https://www.barc.gov.in/tenders/GCC-LPS.pdf
e.g. https://www.montrosecounty.net/DocumentCenter/View/823/Sample-Construction-Contract
'''
st.markdown(multi)
multi = '''2. Press Summary .'''
st.markdown(multi)
multi = '''
** Attempt is made for summary ** \n
'''
st.markdown(multi)
#uploaded_file = st.file_uploader("Choose a file")
list_docs = []
list_save_path = []
uploaded_files = st.file_uploader("Choose file(s)", accept_multiple_files=True)
submitted = st.form_submit_button("Summary")
if submitted and (uploaded_files is not None):
list_docs, list_save_path = util_upload_file_and_return_list_docs(uploaded_files)
documents = util_get_list_page_and_passage(list_docs, list_save_path)
for index, item in enumerate(documents):
st.write('Summary' + str(index+1) + ' :: ')
st.write(get_summary_single_doc(item))
|