Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
CHANGED
|
@@ -1,63 +1,53 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
):
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
value=0.95,
|
| 55 |
-
step=0.05,
|
| 56 |
-
label="Top-p (nucleus sampling)",
|
| 57 |
-
),
|
| 58 |
-
],
|
| 59 |
-
)
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
if __name__ == "__main__":
|
| 63 |
-
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import fitz # PyMuPDF
|
| 3 |
+
from langchain.chains import ConversationChain
|
| 4 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 5 |
+
from langchain.schema import Document
|
| 6 |
+
from langchain.vectorstores import Chroma
|
| 7 |
+
from langchain.embeddings import OpenAIEmbeddings
|
| 8 |
+
from langchain.llms import OpenAI
|
| 9 |
+
from langchain.chains import RetrievalQA
|
| 10 |
+
from langchain.prompts import PromptTemplate
|
| 11 |
+
from langchain.memory import ConversationBufferMemory
|
| 12 |
+
from langchain.chains import ConversationalRetrievalChain
|
| 13 |
+
|
| 14 |
+
def extract_text_from_pdf(pdf_path):
|
| 15 |
+
doc = fitz.open(pdf_path)
|
| 16 |
+
text = ""
|
| 17 |
+
for page_num in range(len(doc)):
|
| 18 |
+
page = doc.load_page(page_num)
|
| 19 |
+
text += page.get_text()
|
| 20 |
+
return text
|
| 21 |
+
|
| 22 |
+
# Load the text from the PDF and preprocess
|
| 23 |
+
openai_api_key = os.getenv("sk-proj-bxh8lX8T6EoQaDWm2cljT3BlbkFJylU5bVGc2eQxB8WCP1Ub")
|
| 24 |
+
pdf_path = "iess402.pdf" # Path to your PDF file
|
| 25 |
+
pdf_text = extract_text_from_pdf(pdf_path)
|
| 26 |
+
document = Document(page_content=pdf_text, metadata={})
|
| 27 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=25)
|
| 28 |
+
all_splits = text_splitter.split_documents([document])
|
| 29 |
+
|
| 30 |
+
# Create vector store and setup the QA chain
|
| 31 |
+
vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings(api_key=openai_api_key))
|
| 32 |
+
llm = OpenAI(api_key=openai_api_key, temperature=0, model="gpt-3.5-turbo-instruct", verbose=True)
|
| 33 |
+
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
|
| 34 |
+
{context}
|
| 35 |
+
Question: {question}
|
| 36 |
+
Helpful Answer:"""
|
| 37 |
+
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"], template=template)
|
| 38 |
+
|
| 39 |
+
# Setup conversational retrieval chain with memory
|
| 40 |
+
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
| 41 |
+
retriever = vectorstore.as_retriever()
|
| 42 |
+
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
|
| 43 |
+
|
| 44 |
+
# Define the function to ask questions and get answers
|
| 45 |
+
def ask_question(question):
|
| 46 |
+
result = qa.invoke({"question": question})
|
| 47 |
+
return result['answer']
|
| 48 |
+
|
| 49 |
+
# Create the Gradio interface
|
| 50 |
+
iface = gr.Interface(fn=ask_question, inputs="text", outputs="text", title="PDF QA System", description="Ask questions based Textbook in Political Science for Class IX chapter 2.")
|
| 51 |
+
|
| 52 |
+
# Launch the Gradio interface
|
| 53 |
+
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|