Spaces:
Runtime error
Runtime error
| """## Import necessary libraries""" | |
| import os | |
| import shutil | |
| import json | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.document_loaders import PyPDFDirectoryLoader | |
| from langchain.llms import OpenAI | |
| from langchain.prompts import PromptTemplate | |
| from langchain.chains import LLMChain | |
| from langchain.output_parsers import PydanticOutputParser | |
| from pydantic import BaseModel, Field | |
| from langchain.document_loaders import YoutubeLoader | |
| from langchain.document_loaders import WebBaseLoader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain.vectorstores import Chroma | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.chains import RetrievalQA | |
| #from google.colab import drive | |
| from google.oauth2 import service_account | |
| from google.cloud import translate_v2 as translate | |
| import gradio as gr | |
| """## Access KEY""" | |
| #ACCESS_KEY = os.environ.get("ACCESS_KEY") | |
| service_account_info = json.loads(os.environ.get("SERVICE_ACCOUNT_FILE")) | |
| credentials = service_account.Credentials.from_service_account_info(service_account_info) | |
| """ ## Load PDF """ | |
| class LoadPdf: | |
| def __init__(self, pdf_file): | |
| if not self.is_pdf_file(pdf_file): | |
| raise gr.Error("Invalid file extension. Please load a PDF file") | |
| self.pdf_file = pdf_file | |
| def is_pdf_file(self, file_path): | |
| _, file_extension = os.path.splitext(file_path) | |
| return file_extension.lower() == ".pdf" | |
| def read_file(self): | |
| loader = PyPDFLoader(self.pdf_file) | |
| data = loader.load() | |
| return data | |
| """## Request OpenAI""" | |
| class QuestionAnswer: | |
| def __init__(self, data, question, user_key): | |
| self.data = data | |
| self.question = question | |
| self.key = user_key | |
| def make_qa(self): | |
| #Splitter | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
| splits = text_splitter.split_documents(self.data) | |
| #Persist dir | |
| persist_directory = 'files/chroma/' | |
| #EMbedings | |
| embedding = OpenAIEmbeddings(openai_api_key=self.key) | |
| retriever = Chroma.from_documents(documents=splits, | |
| embedding=embedding, | |
| persist_directory=persist_directory).as_retriever() | |
| # initialize the LLM | |
| llm = ChatOpenAI(temperature=0.2, model="gpt-3.5-turbo-16k", openai_api_key=self.key) | |
| question_answer = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) | |
| make_question = f'{self.question}' | |
| return question_answer.run(make_question) | |
| """## Translation""" | |
| class TranslateOutput: | |
| def __init__(self, credentials): | |
| self.credentials = credentials | |
| def list_languages(self): | |
| client = translate.client.Client(credentials=self.credentials) | |
| languages = client.get_languages() | |
| language_names = [language['name'] for language in languages] | |
| return language_names | |
| def all_languages(self): | |
| client = translate.client.Client(credentials=self.credentials) | |
| languages = client.get_languages() | |
| return languages | |
| def translate_text(self, text, target_language): | |
| client = translate.client.Client(target_language=target_language, credentials=self.credentials) | |
| if isinstance(text, bytes): | |
| text = text.decode("utf-8") | |
| result = client.translate(text, target_language=target_language) | |
| return result["translatedText"] | |
| """## Run QA """ | |
| def run_qa(files,checkboxes,question,language,user_key): | |
| #secret_key = os.environ.get("SECRET_KEY") | |
| if user_key is None: | |
| return 'Introduza OpenAI API KEY' | |
| full_filenames = [file.name for file in files] | |
| available_files = [os.path.basename(path) for path in full_filenames] | |
| chosen_files = checkboxes | |
| # Filter files that are both available and chosen | |
| loadable_files = [file for file in available_files if file in chosen_files] | |
| # debug messages | |
| print(f"=> Available Files: {str(available_files)}") | |
| print(f"=> Chosen Files: {str(chosen_files)}") | |
| print(f"=> Question for Files: {str(question)}") | |
| print(f"=> Language to use: {str(language)}") | |
| # clear data | |
| data='' | |
| # Load files | |
| for file in loadable_files: | |
| print(f"=> Loading chosen file: {str(file)}") | |
| pdf_loader = LoadPdf("pdfs/"+file) | |
| data = pdf_loader.read_file() | |
| # Run the model | |
| qa = QuestionAnswer(data, question, user_key) | |
| answer_open_ai = qa.make_qa() | |
| # Translate output | |
| language_selected = language | |
| translate_output = TranslateOutput(credentials) | |
| for i in translate_output.all_languages(): | |
| if i['name'] == language_selected: | |
| iso_code = i['language'] | |
| break | |
| print(f"=> Answer OpenAI: {answer_open_ai}") | |
| print(f"=> Target Language IsoCode: {iso_code}") | |
| answer = translate_output.translate_text(answer_open_ai, target_language=iso_code) | |
| print(f"=> Translated Answer OpenAI: {answer}") | |
| return answer | |
| # Define a function to be called when files are uploaded | |
| def on_files_upload(files): | |
| # save files to files dir | |
| if not os.path.exists("pdfs"): | |
| os.makedirs("pdfs", exist_ok=True) | |
| # print(f"The directory 'pdfs' was created!"); | |
| files_dir = "pdfs" | |
| for fileobj in files: | |
| path = files_dir + "/" + os.path.basename(fileobj) | |
| shutil.copyfile(fileobj.name, path) | |
| # checkbox group update | |
| full_filenames = [file.name for file in files] | |
| filenames = [os.path.basename(path) for path in full_filenames] | |
| return(gr.CheckboxGroup(choices=filenames)) | |
| # Define a function to be called when files are cleared | |
| def on_files_cleared(): | |
| if os.path.exists("pdfs"): | |
| shutil.rmtree("pdfs") | |
| # print(f"The directory was removed!"); | |
| return(gr.CheckboxGroup(choices=[])) | |
| # Define the Gradio interface | |
| title = "Question/Answer over Documents" | |
| subtitle = "OpenAI GPT 3.5 Turbo LLM assisted Question/Answer over multiple PDF context documents" | |
| authors = "Hugo Cavalaria " | |
| custom_layout = "<h1>{}</h1><h2>{}</h2><p>{}</p>".format(title,subtitle,authors) | |
| # Get the list of languages available | |
| translate_output = TranslateOutput(credentials) | |
| language_names = [i for i in translate_output.list_languages()] | |
| # Gradio Interface | |
| with gr.Blocks() as interface: | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| gr.HTML(custom_layout) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| upload_pdfs = gr.Files(label="Upload multiple PDF files.", interactive=True, file_types=['.pdf'], container=True) | |
| checkbox_group = gr.CheckboxGroup(label="Select the files to question.", choices=[], interactive=True) | |
| question_text = gr.Textbox(label="Question:") | |
| answer_language = gr.Dropdown(label="Answer translation to:", choices=language_names, value="Portuguese") | |
| secret_key = gr.Textbox(label="OpenAI API Key:") | |
| with gr.Column(scale=1): | |
| output_status = gr.Textbox(label="Answer:") | |
| btn = gr.Button("Ask") | |
| btn.click(fn=run_qa, | |
| inputs=[upload_pdfs,checkbox_group,question_text,answer_language,secret_key], | |
| outputs=[output_status]) | |
| upload_pdfs.upload(fn=on_files_upload, | |
| inputs=[upload_pdfs], | |
| outputs=[checkbox_group], | |
| show_progress="full") | |
| upload_pdfs.clear(fn=on_files_cleared, | |
| inputs=None, | |
| outputs=[checkbox_group]) | |
| """## Launch Interface""" | |
| # launch interface | |
| if __name__ == "__main__": | |
| interface.launch(share=False, debug=True) |