Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| from PyPDF2 import PdfReader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain import vectorstores | |
| from langchain import chains | |
| from langchain import llms | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| import gradio as gr | |
| load_dotenv() | |
| llm = llms.AI21(ai21_api_key=os.getenv('AI21_API_KEY')) | |
| def process_pdf(pdf_file): | |
| pdf_reader = PdfReader(pdf_file) | |
| texts = "" | |
| for page in pdf_reader.pages: | |
| texts += page.extract_text() | |
| text_splitter = CharacterTextSplitter( | |
| separator="\n", | |
| chunk_size=1000, | |
| chunk_overlap=0 | |
| ) | |
| chunks = text_splitter.split_text(texts) | |
| embeddings = HuggingFaceEmbeddings() | |
| db = vectorstores.Chroma.from_texts(chunks, embeddings) | |
| retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":10}) | |
| qa = chains.ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever) | |
| return qa | |
| def answer_question(pdf_file, question, chat_history): | |
| if not pdf_file: | |
| return "Please upload a PDF file first." | |
| qa = process_pdf(pdf_file) | |
| result = qa({"question": question, "chat_history": chat_history}) | |
| chat_history.append((question, result["answer"])) | |
| return result["answer"] | |
| def main(): | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# PDF QA") | |
| with gr.Row(): | |
| pdf_file = gr.File(label="Upload your PDF", file_types=[".pdf"]) | |
| question = gr.Textbox(label="Ask a question about the PDF") | |
| output = gr.Textbox(label="Answer") | |
| chat_history = gr.State([]) | |
| submit_btn = gr.Button("Submit") | |
| submit_btn.click(answer_question, inputs=[pdf_file, question, chat_history], outputs=output) | |
| demo.launch() | |
| if __name__ == "__main__": | |
| main() |