Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| from PyPDF2 import PdfReader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain import vectorstores | |
| from langchain import chains | |
| from langchain import llms | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| import gradio as gr | |
| llm = llms.AI21(ai21_api_key='diNNQzvL40ZnBnEQkIBwNESWjtj792NG') | |
| def pdf_qa(pdf, query): | |
| if pdf is not None: | |
| pdf_reader = PdfReader(pdf) | |
| texts = "" | |
| for page in pdf_reader.pages: | |
| texts += page.extract_text() | |
| text_splitter = CharacterTextSplitter( | |
| separator="\n", | |
| chunk_size=1000, | |
| chunk_overlap=0 | |
| ) | |
| chunks = text_splitter.split_text(texts) | |
| embeddings = HuggingFaceEmbeddings() | |
| db = vectorstores.Chroma.from_texts(chunks, embeddings) | |
| retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 10}) | |
| qa = chains.ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever) | |
| chat_history = [] | |
| if query: | |
| result = qa({"question": query, "chat_history": chat_history}) | |
| return result["answer"] | |
| return "Please upload a PDF and enter a query." | |
| with gr.Blocks() as demo: | |
| pdf_input = gr.File(label="Upload your PDF", type="file") | |
| query_input = gr.Textbox(label="Ask a question in PDF") | |
| output = gr.Textbox(label="Answer") | |
| submit_button = gr.Button("Submit") | |
| submit_button.click(fn=pdf_qa, inputs=[pdf_input, query_input], outputs=output) | |
| demo.launch() | |