nikhmr1235 commited on
Commit
79ff6ae
·
verified ·
1 Parent(s): bc68e2a

attempt to fix keyerror:7 on global state object

Browse files

The additional_inputs parameter in gr.ChatInterface doesn't directly support gr.State objects the way it is being used
Instead, we need to encapsulate state and functions within a class

Files changed (1) hide show
  1. app.py +88 -121
app.py CHANGED
@@ -15,132 +15,105 @@ import tempfile
15
  # Constants
16
  LLM_MODEL = "gemini-1.5-flash"
17
  EMBEDDING_MODEL = "BAAI/bge-large-en-v1.5"
18
- CHROMA_DB_PATH = tempfile.gettempdir() + "/chroma_db"
19
 
20
- # Set the Google API key from environment variables
21
- # This is the recommended way to handle secrets in Hugging Face Spaces
22
- if "GOOGLE_API_KEY" not in os.environ:
23
- gr.Error("Please set the GOOGLE_API_KEY environment variable in your Hugging Face Space secrets.")
24
- else:
25
- os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
26
-
27
- # Global state to hold session data
28
- class SessionState:
29
  def __init__(self):
30
- self.session_id = str(uuid.uuid4())
31
- self.db = None
32
- self.vector_store_path = os.path.join(CHROMA_DB_PATH, self.session_id)
33
-
34
- def is_db_ready(self):
35
- return self.db is not None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- # Helper function to generate a new session state
38
- def new_session():
39
- return SessionState()
40
 
41
- # Function to handle PDF upload and ingestion
42
- def process_pdf(pdf_file, state):
43
- try:
44
- # Check if a PDF has already been processed in this session
45
- if state and state.is_db_ready():
46
  return (
47
  gr.update(interactive=False),
48
- gr.update(visible=True),
49
- state
50
  )
51
-
52
- # File size validation
53
- file_size_mb = os.path.getsize(pdf_file.name) / (1024 * 1024)
54
- if file_size_mb >= 75:
55
- gr.Error("File size exceeds the 75 MB limit. Please upload a smaller PDF.")
56
  return (
57
  gr.update(interactive=True),
58
- gr.update(visible=False),
59
- state
60
  )
61
 
62
- # If a file is uploaded, a new session should be started
63
- # The new session object is returned to update the state
64
- new_state = new_session()
65
-
66
- # Extract text from the PDF using PyMuPDF (fitz)
67
- doc = fitz.open(pdf_file.name)
68
- text = ""
69
- for page in doc:
70
- text += doc.get_text()
71
- doc.close()
72
-
73
- # Split text into chunks
74
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
75
- docs = text_splitter.create_documents([text])
76
-
77
- # Create a ChromaDB vector store from the documents
78
- embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL)
79
- new_state.db = Chroma.from_documents(
80
- documents=docs,
81
- embedding=embeddings,
82
- persist_directory=new_state.vector_store_path
83
  )
84
 
85
- gr.Info("PDF processed successfully! You can now ask questions about the document.")
86
-
87
- # Return updates to the UI components and the new state
88
- return (
89
- gr.update(interactive=False),
90
- gr.update(visible=True),
91
- new_state
92
- )
93
- except Exception as e:
94
- # Clean up the directory in case of an error
95
- if state and os.path.exists(state.vector_store_path):
96
- shutil.rmtree(state.vector_store_path)
97
- gr.Error(f"An error occurred: {str(e)}")
98
- # Re-enable the file upload in case of error
99
- return (
100
- gr.update(interactive=True),
101
- gr.update(visible=False),
102
- state
103
  )
104
 
105
- # Function to handle user queries
106
- def chat_with_pdf(message, history, state):
107
- # Add a defensive check for the state object itself
108
- if not state or not state.is_db_ready():
109
- yield "Please upload a PDF first to begin the conversation."
110
- return
111
-
112
- # Use the ChromaDB instance from the session state
113
- retriever = state.db.as_retriever()
114
-
115
- # Set up the RAG chain
116
- llm = ChatGoogleGenerativeAI(model=LLM_MODEL, temperature=0.7)
117
-
118
- prompt_template = PromptTemplate(
119
- template="""
120
- You are a helpful assistant for a PDF document.
121
- Answer the user's question based on the following context.
122
- If you don't know the answer, just say that you don't know, don't try to make up an answer.
123
- ----------------
124
- Context: {context}
125
- Question: {question}
126
- """,
127
- input_variables=["context", "question"],
128
- )
129
 
130
- rag_chain = (
131
- {"context": retriever, "question": RunnablePassthrough()}
132
- | prompt_template
133
- | llm
134
- | StrOutputParser()
135
- )
136
 
137
- response = rag_chain.invoke(message)
138
- yield response
 
 
 
 
139
 
140
- # Gradio Interface
141
  with gr.Blocks(title="PDF Chatbot") as demo:
142
- # Corrected: Initialize gr.State with the object returned by the function
143
- state = gr.State(new_session())
144
 
145
  gr.Markdown(
146
  """
@@ -148,34 +121,28 @@ with gr.Blocks(title="PDF Chatbot") as demo:
148
  Upload a PDF to start a conversation with your document.
149
  """
150
  )
151
-
152
  with gr.Row():
153
  file_upload_input = gr.File(
154
  file_types=[".pdf"],
155
  label="Upload your PDF document",
156
  interactive=True
157
  )
158
-
159
- # Use gr.ChatInterface as a top-level component that wraps the chat logic
160
  chat_interface = gr.ChatInterface(
161
- fn=chat_with_pdf,
162
  chatbot=gr.Chatbot(type="messages"),
163
  textbox=gr.Textbox(placeholder="Type your question here...", scale=7),
164
  examples=[["What is the main topic of the document?"], ["Summarize the key findings."], ["Who are the authors?"]],
165
  title="Chat Interface",
166
  theme="soft",
167
- # Fix: Add the state as an additional input to the ChatInterface
168
- additional_inputs=[state]
169
  )
170
 
171
- # Initially hide the chat interface until a file is processed
172
- chat_interface.visible = False
173
-
174
- # Event handler for file upload
175
  file_upload_input.upload(
176
- fn=process_pdf,
177
- inputs=[file_upload_input, state],
178
- outputs=[file_upload_input, chat_interface, state]
179
  )
180
 
181
- demo.launch()
 
15
  # Constants
16
  LLM_MODEL = "gemini-1.5-flash"
17
  EMBEDDING_MODEL = "BAAI/bge-large-en-v1.5"
18
+ CHROMA_DB_PATH = tempfile.gettempdir() + "/chroma_db"
19
 
20
+ class PDFChatbot:
 
 
 
 
 
 
 
 
21
  def __init__(self):
22
+ self.state = SessionState()
23
+
24
+ def process_pdf(self, pdf_file):
25
+ try:
26
+ if self.state.is_db_ready():
27
+ return (
28
+ gr.update(interactive=False),
29
+ gr.update(visible=True)
30
+ )
31
+
32
+ file_size_mb = os.path.getsize(pdf_file.name) / (1024 * 1024)
33
+ if file_size_mb >= 75:
34
+ gr.Error("File size exceeds the 75 MB limit. Please upload a smaller PDF.")
35
+ return (
36
+ gr.update(interactive=True),
37
+ gr.update(visible=False)
38
+ )
39
+
40
+ self.state = SessionState()
41
+ doc = fitz.open(pdf_file.name)
42
+ text = ""
43
+ for page in doc:
44
+ text += page.get_text()
45
+ doc.close()
46
+
47
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
48
+ docs = text_splitter.create_documents([text])
49
+
50
+ embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL)
51
+ self.state.db = Chroma.from_documents(
52
+ documents=docs,
53
+ embedding=embeddings,
54
+ persist_directory=self.state.vector_store_path
55
+ )
56
 
57
+ gr.Info("PDF processed successfully! You can now ask questions about the document.")
 
 
58
 
 
 
 
 
 
59
  return (
60
  gr.update(interactive=False),
61
+ gr.update(visible=True)
 
62
  )
63
+ except Exception as e:
64
+ if os.path.exists(self.state.vector_store_path):
65
+ shutil.rmtree(self.state.vector_store_path)
66
+ gr.Error(f"An error occurred: {str(e)}")
 
67
  return (
68
  gr.update(interactive=True),
69
+ gr.update(visible=False)
 
70
  )
71
 
72
+ def chat_with_pdf(self, message, history):
73
+ if not self.state.is_db_ready():
74
+ yield "Please upload a PDF first to begin the conversation."
75
+ return
76
+
77
+ retriever = self.state.db.as_retriever()
78
+ llm = ChatGoogleGenerativeAI(model=LLM_MODEL, temperature=0.7)
79
+
80
+ prompt_template = PromptTemplate(
81
+ template="""
82
+ You are a helpful assistant for a PDF document.
83
+ Answer the user's question based on the following context.
84
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
85
+ ----------------
86
+ Context: {context}
87
+ Question: {question}
88
+ """,
89
+ input_variables=["context", "question"],
 
 
 
90
  )
91
 
92
+ rag_chain = (
93
+ {"context": retriever, "question": RunnablePassthrough()}
94
+ | prompt_template
95
+ | llm
96
+ | StrOutputParser()
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  )
98
 
99
+ response = rag_chain.invoke(message)
100
+ yield response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ class SessionState:
103
+ def __init__(self):
104
+ self.session_id = str(uuid.uuid4())
105
+ self.db = None
106
+ self.vector_store_path = os.path.join(CHROMA_DB_PATH, self.session_id)
 
107
 
108
+ def is_db_ready(self):
109
+ return self.db is not None
110
+
111
+ # Set the Google API key from environment variables
112
+ if "GOOGLE_API_KEY" not in os.environ:
113
+ raise Exception("Please set the GOOGLE_API_KEY environment variable.")
114
 
 
115
  with gr.Blocks(title="PDF Chatbot") as demo:
116
+ chatbot = PDFChatbot()
 
117
 
118
  gr.Markdown(
119
  """
 
121
  Upload a PDF to start a conversation with your document.
122
  """
123
  )
124
+
125
  with gr.Row():
126
  file_upload_input = gr.File(
127
  file_types=[".pdf"],
128
  label="Upload your PDF document",
129
  interactive=True
130
  )
131
+
 
132
  chat_interface = gr.ChatInterface(
133
+ fn=chatbot.chat_with_pdf,
134
  chatbot=gr.Chatbot(type="messages"),
135
  textbox=gr.Textbox(placeholder="Type your question here...", scale=7),
136
  examples=[["What is the main topic of the document?"], ["Summarize the key findings."], ["Who are the authors?"]],
137
  title="Chat Interface",
138
  theme="soft",
139
+ visible=False
 
140
  )
141
 
 
 
 
 
142
  file_upload_input.upload(
143
+ fn=chatbot.process_pdf,
144
+ inputs=[file_upload_input],
145
+ outputs=[file_upload_input, chat_interface]
146
  )
147
 
148
+ demo.launch()