manoj1hcl commited on
Commit
9edbb70
·
verified ·
1 Parent(s): 865a13d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -0
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pdfplumber
3
+ from dotenv import load_dotenv
4
+ import gradio as gr
5
+ from langchain_openai import OpenAIEmbeddings, ChatOpenAI
6
+ from langchain_chroma import Chroma
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.chains import RetrievalQA
9
+ from langchain.llms import OpenAI
10
+ from langchain.document_loaders import TextLoader
11
+ from langchain.docstore.document import Document
12
+ from transformers import AutoTokenizer
13
+ from langchain.document_loaders import PyPDFLoader
14
+ from langchain.memory import ConversationBufferMemory
15
+ from langchain.chains import ConversationalRetrievalChain
16
+
17
+
18
+
19
+ # price is a factor for our company, so we're going to use a low cost model
20
+ MODEL = "gpt-4o-mini"
21
+ db_name = "vector_db"
22
+
23
+ # Load environment variables in a file called .env
24
+
25
+ load_dotenv(override=True)
26
+ os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
27
+
28
+ def process_pdf(pdf_file):
29
+ loader = PyPDFLoader(pdf_file.name)
30
+ pages = loader.load()
31
+
32
+ text_splitter = RecursiveCharacterTextSplitter(
33
+ chunk_size=500,
34
+ chunk_overlap=50
35
+ )
36
+ chunks = text_splitter.split_documents(pages)
37
+ embeddings = OpenAIEmbeddings()
38
+ #print(chunks)
39
+ if os.path.exists(db_name):
40
+ Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()
41
+
42
+ # Embed the chunks with OpenAI Embeddings
43
+
44
+ vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)
45
+
46
+ # Sample embedding dimension
47
+ collection = vectorstore._collection
48
+ sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
49
+ dimensions = len(sample_embedding)
50
+ print(f"The vectors have {dimensions:,} dimensions")
51
+
52
+ # Create the OpenAI Chat Model
53
+ llm = ChatOpenAI(temperature=0.7, model=MODEL) # Or another model
54
+ response = llm.invoke([HumanMessage(content="Hello, how are you?")])
55
+ print(response.content)
56
+ # Set up conversation memory
57
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
58
+
59
+ # Set up the retriever (vector store)
60
+ retriever = vectorstore.as_retriever()
61
+
62
+ # Set up the Conversational Retrieval Chain
63
+ conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)
64
+
65
+ # Return the conversation chain
66
+
67
+ return conversation_chain
68
+
69
+ # Function to upload PDF
70
+ def upload_pdf(file):
71
+ global chain
72
+ if file is None:
73
+ chain = None
74
+ return "pleae upload the file!"
75
+ chain = process_pdf(file)
76
+ return "processed the file ask questions"
77
+
78
+ # ask_question function
79
+ def ask_question(question):
80
+
81
+ # Assume chain_holder["chain"] is your retrieval QA chain (e.g., ConversationalRetrievalChain)
82
+ if chain is None:
83
+ return "upload the pdf first"
84
+
85
+ # Run the chain to get an answer
86
+ response = chain.invoke({"question": question})
87
+
88
+ # You might need to adjust depending on what chain.invoke returns
89
+ if isinstance(response, dict) and "answer" in response:
90
+ return response["answer"]
91
+ else:
92
+ return str(response) # Just return raw output if not nicely formatted
93
+
94
+ # Building Gradio Interface
95
+ with gr.Blocks() as demo:
96
+ gr.Markdown("## Chat with your pdf!!")
97
+
98
+ # File uploader
99
+ file_input = gr.File(label="Upload your PDF 📄", file_types=[".pdf"])
100
+
101
+ # Status text
102
+ status = gr.Textbox(label="Status", interactive=False)
103
+ question = gr.Textbox(label="Ask a question about the PDF")
104
+ answer = gr.Textbox(label="Answer")
105
+ ask_button = gr.Button("Ask")
106
+
107
+ file_input.change(upload_pdf, inputs=[file_input], outputs=[status])
108
+ ask_button.click(ask_question, inputs=[question], outputs=[answer])
109
+
110
+ chain = None # global QA chain
111
+
112
+ # Launch the app
113
+ demo.launch(inline=False)