Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import langchain | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain.vectorstores import Chroma | |
| from langchain import OpenAI, VectorDBQA | |
| from langchain.chains import RetrievalQAWithSourcesChain | |
| import PyPDF2 | |
| api_key = os.environ["OPENAI_API_KEY"] | |
| #This function will go through pdf and extract and return list of page texts. | |
| def read_and_textify(files): | |
| text_list = [] | |
| sources_list = [] | |
| for file in files: | |
| pdfReader = PyPDF2.PdfReader(file) | |
| #print("Page Number:", len(pdfReader.pages)) | |
| for i in range(len(pdfReader.pages)): | |
| pageObj = pdfReader.pages[i] | |
| text = pageObj.extract_text() | |
| pageObj.clear() | |
| text_list.append(text) | |
| sources_list.append(file.name + "_page_"+str(i)) | |
| return [text_list,sources_list] | |
| st.set_page_config(layout="centered", page_title="Multidoc_QnA") | |
| st.header("Multidoc_QnA") | |
| st.write("---") | |
| #file uploader | |
| uploaded_files = st.file_uploader("Upload documents",accept_multiple_files=True, type=["txt","pdf"]) | |
| st.write("---") | |
| if uploaded_files is None: | |
| st.info(f"""Upload files to analyse""") | |
| elif uploaded_files: | |
| st.write(str(len(uploaded_files)) + " document(s) loaded..") | |
| textify_output = read_and_textify(uploaded_files) | |
| documents = textify_output[0] | |
| sources = textify_output[1] | |
| #extract embeddings | |
| embeddings = OpenAIEmbeddings(openai_api_key = api_key) | |
| #vstore with metadata. Here we will store page numbers. | |
| vStore = Chroma.from_texts(documents, embeddings, metadatas=[{"source": s} for s in sources]) | |
| #deciding model | |
| model_name = "gpt-3.5-turbo" | |
| # model_name = "gpt-4" | |
| retriever = vStore.as_retriever() | |
| retriever.search_kwargs = {'k':2} | |
| #initiate model | |
| llm = OpenAI(model_name=model_name, openai_api_key = api_key, streaming=True) | |
| model = RetrievalQAWithSourcesChain.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever) | |
| st.header("Ask your data") | |
| user_q = st.text_area("Enter your questions here") | |
| if st.button("Get Response"): | |
| try: | |
| with st.spinner("Model is working on it..."): | |
| result = model({"question":user_q}, return_only_outputs=True) | |
| st.subheader('Your response:') | |
| st.write(result['answer']) | |
| st.subheader('Source pages:') | |
| st.write(result['sources']) | |
| except Exception as e: | |
| st.error(f"An error occurred: {e}") | |
| st.error('Oops, the GPT response resulted in an error :( Please try again with a different question.') | |
| # import gradio as gr | |
| # import streamlit as st | |
| # from langchain.embeddings.openai import OpenAIEmbeddings | |
| # from langchain.text_splitter import CharacterTextSplitter | |
| # from langchain.vectorstores import Chroma | |
| # from langchain.chains import ConversationalRetrievalChain | |
| # from langchain.chat_models import ChatOpenAI | |
| # from langchain.document_loaders import PyPDFLoader | |
| # import os | |
| # import fitz | |
| # from PIL import Image | |
| # # Global variables | |
| # COUNT, N = 0, 0 | |
| # chat_history = [] | |
| # chain = None # Initialize chain as None | |
| # # Function to set the OpenAI API key | |
| # api_key = os.environ['OPENAI_API_KEY'] | |
| # st.write(api_key) | |
| # # Function to enable the API key input box | |
| # def enable_api_box(): | |
| # return enable_box | |
| # # Function to add text to the chat history | |
| # def add_text(history, text): | |
| # if not text: | |
| # raise gr.Error('Enter text') | |
| # history = history + [(text, '')] | |
| # return history | |
| # # Function to process the PDF file and create a conversation chain | |
| # def process_file(file): | |
| # global chain | |
| # if 'OPENAI_API_KEY' not in os.environ: | |
| # raise gr.Error('Upload your OpenAI API key') | |
| # # Replace with your actual PDF processing logic | |
| # loader = PyPDFLoader(file.name) | |
| # documents = loader.load() | |
| # embeddings = OpenAIEmbeddings() | |
| # pdfsearch = Chroma.from_documents(documents, embeddings) | |
| # chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.3), | |
| # retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}), | |
| # return_source_documents=True) | |
| # return chain | |
| # # Function to generate a response based on the chat history and query | |
| # def generate_response(history, query, pdf_upload): | |
| # global COUNT, N, chat_history, chain | |
| # if not pdf_upload: | |
| # raise gr.Error(message='Upload a PDF') | |
| # if COUNT == 0: | |
| # chain = process_file(pdf_upload) | |
| # COUNT += 1 | |
| # # Replace with your LangChain logic to generate a response | |
| # result = chain({"question": query, 'chat_history': chat_history}, return_only_outputs=True) | |
| # chat_history += [(query, result["answer"])] | |
| # N = list(result['source_documents'][0])[1][1]['page'] # Adjust as needed | |
| # for char in result['answer']: | |
| # history[-1][-1] += char | |
| # return history, '' | |
| # # Function to render a specific page of a PDF file as an image | |
| # def render_file(file): | |
| # global N | |
| # doc = fitz.open(file.name) | |
| # page = doc[N] | |
| # pix = page.get_pixmap(matrix=fitz.Matrix(300/72, 300/72)) | |
| # image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples) | |
| # return image | |
| # # Function to render initial content from the PDF | |
| # def render_first(pdf_file): | |
| # # Replace with logic to process the PDF and generate an initial image | |
| # image = Image.new('RGB', (600, 400), color = 'white') # Placeholder | |
| # return image | |
| # # Streamlit & Gradio Interface | |
| # st.title("PDF-Powered Chatbot") | |
| # with st.container(): | |
| # gr.Markdown(""" | |
| # <style> | |
| # .image-container { height: 680px; } | |
| # </style> | |
| # """) | |
| # with gr.Blocks() as demo: | |
| # pdf_upload1 = gr.UploadButton("๐ Upload PDF 1", file_types=[".pdf"]) # Define pdf_upload1 | |
| # # ... (rest of your interface creation) | |
| # txt = gr.Textbox(label="Enter your query", placeholder="Ask a question...") | |
| # submit_btn = gr.Button('Submit') | |
| # @submit_btn.click() | |
| # def on_submit(): | |
| # add_text(chatbot, txt) | |
| # generate_response(chatbot, txt, pdf_upload1) # Use pdf_upload1 here | |
| # render_file(pdf_upload1) # Use pdf_upload1 here | |
| # if __name__ == "__main__": | |
| # gr.Interface( | |
| # fn=generate_response, | |
| # inputs=[ | |
| # "file", # Define pdf_upload1 | |
| # "text", # Define chatbot output | |
| # "text" # Define txt | |
| # ], | |
| # outputs=[ | |
| # "image", # Define show_img | |
| # "text", # Define chatbot output | |
| # "text" # Define txt | |
| # ], | |
| # title="PDF-Powered Chatbot" | |
| # ).launch() | |