Spaces:
Sleeping
Sleeping
Hasnan Ramadhan
commited on
Commit
·
5673dfd
1
Parent(s):
49d345d
fixing bug
Browse files
app.py
CHANGED
|
@@ -5,15 +5,19 @@ from langchain_community.document_loaders import PyMuPDFLoader
|
|
| 5 |
import requests
|
| 6 |
from groq import Groq
|
| 7 |
import os
|
| 8 |
-
|
| 9 |
import tempfile
|
| 10 |
from googlesearch import search
|
| 11 |
from bs4 import BeautifulSoup
|
| 12 |
from urllib.parse import urljoin, urlparse
|
| 13 |
import re
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
class DocumentState(TypedDict):
|
| 18 |
documents: list[dict]
|
| 19 |
summaries: list[str]
|
|
@@ -299,6 +303,108 @@ def process_pdf_and_chat(pdf_file, message, history, system_message, max_tokens,
|
|
| 299 |
except Exception as e:
|
| 300 |
return history + [(message, f"Error processing PDF: {str(e)}")]
|
| 301 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
def respond(message, history, system_message, max_tokens, temperature, top_p, enable_search=False):
|
| 303 |
"""Enhanced chat function with optional Google search"""
|
| 304 |
if enable_search:
|
|
@@ -347,20 +453,20 @@ with gr.Blocks() as demo:
|
|
| 347 |
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
|
| 348 |
|
| 349 |
with gr.Column(scale=2):
|
| 350 |
-
chatbot = gr.Chatbot()
|
| 351 |
msg = gr.Textbox(label="Message")
|
| 352 |
clear = gr.Button("Clear")
|
| 353 |
|
| 354 |
def user_input(message, history):
|
| 355 |
-
return "", history + [
|
| 356 |
|
| 357 |
def bot_response(history, pdf_file, enable_search, system_message, max_tokens, temperature, top_p):
|
| 358 |
-
message = history[-1][
|
| 359 |
if pdf_file is not None:
|
| 360 |
-
|
| 361 |
else:
|
| 362 |
-
|
| 363 |
-
return
|
| 364 |
|
| 365 |
msg.submit(user_input, [msg, chatbot], [msg, chatbot], queue=False).then(
|
| 366 |
bot_response, [chatbot, pdf_upload, enable_search, system_message, max_tokens, temperature, top_p], chatbot
|
|
@@ -368,4 +474,4 @@ with gr.Blocks() as demo:
|
|
| 368 |
clear.click(lambda: None, None, chatbot, queue=False)
|
| 369 |
|
| 370 |
if __name__ == "__main__":
|
| 371 |
-
demo.launch()
|
|
|
|
| 5 |
import requests
|
| 6 |
from groq import Groq
|
| 7 |
import os
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
import tempfile
|
| 10 |
from googlesearch import search
|
| 11 |
from bs4 import BeautifulSoup
|
| 12 |
from urllib.parse import urljoin, urlparse
|
| 13 |
import re
|
| 14 |
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
load_dotenv()
|
| 18 |
+
# Check if GROQ_API_KEY is available
|
| 19 |
+
if not os.getenv("GROQ_API_KEY"):
|
| 20 |
+
print("Warning: GROQ_API_KEY not found in environment variables")
|
| 21 |
class DocumentState(TypedDict):
|
| 22 |
documents: list[dict]
|
| 23 |
summaries: list[str]
|
|
|
|
| 303 |
except Exception as e:
|
| 304 |
return history + [(message, f"Error processing PDF: {str(e)}")]
|
| 305 |
|
| 306 |
+
def respond_messages(message, history, system_message, max_tokens, temperature, top_p, enable_search=False):
|
| 307 |
+
"""Enhanced chat function with optional Google search - returns just the response text"""
|
| 308 |
+
if enable_search:
|
| 309 |
+
# Use search workflow
|
| 310 |
+
search_graph = create_search_graph()
|
| 311 |
+
state = {
|
| 312 |
+
'documents': [],
|
| 313 |
+
'summaries': [],
|
| 314 |
+
'search_results': [],
|
| 315 |
+
'search_query': message,
|
| 316 |
+
'needs_search': False
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
final_state = search_graph.invoke(state)
|
| 320 |
+
|
| 321 |
+
if final_state['summaries']:
|
| 322 |
+
response = final_state['summaries'][0]
|
| 323 |
+
else:
|
| 324 |
+
# Fallback to regular LLM response
|
| 325 |
+
prompt = f"{system_message}\n\nUser: {message}"
|
| 326 |
+
llm_response = get_llm_response(prompt)
|
| 327 |
+
response = llm_response["response"]
|
| 328 |
+
else:
|
| 329 |
+
# Regular chat without search
|
| 330 |
+
prompt = f"{system_message}\n\nUser: {message}"
|
| 331 |
+
llm_response = get_llm_response(prompt)
|
| 332 |
+
response = llm_response["response"]
|
| 333 |
+
|
| 334 |
+
return response
|
| 335 |
+
|
| 336 |
+
def process_pdf_and_chat_messages(pdf_file, message, history, system_message, max_tokens, temperature, top_p, enable_search=False):
|
| 337 |
+
"""Enhanced PDF processing function - returns just the response text"""
|
| 338 |
+
if pdf_file is None:
|
| 339 |
+
return "Please upload a PDF file first."
|
| 340 |
+
|
| 341 |
+
try:
|
| 342 |
+
# Create a temporary file path for the uploaded PDF
|
| 343 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
|
| 344 |
+
tmp_file.write(pdf_file.read())
|
| 345 |
+
tmp_pdf_path = tmp_file.name
|
| 346 |
+
|
| 347 |
+
# Check if user wants to search for additional information
|
| 348 |
+
search_keywords = ['search', 'find more', 'additional info', 'more information', 'research']
|
| 349 |
+
if enable_search and any(keyword in message.lower() for keyword in search_keywords):
|
| 350 |
+
# Use search workflow for additional information
|
| 351 |
+
search_graph = create_search_graph()
|
| 352 |
+
search_state = {
|
| 353 |
+
'documents': [],
|
| 354 |
+
'summaries': [],
|
| 355 |
+
'search_results': [],
|
| 356 |
+
'search_query': message,
|
| 357 |
+
'needs_search': True
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
search_result = search_graph.invoke(search_state)
|
| 361 |
+
|
| 362 |
+
# Also process the PDF
|
| 363 |
+
def document_extractor_with_path(state: DocumentState) -> DocumentState:
|
| 364 |
+
return document_extractor_agent(state, tmp_pdf_path)
|
| 365 |
+
|
| 366 |
+
talking_documents = StateGraph(DocumentState)
|
| 367 |
+
talking_documents.add_node('document_extractor', document_extractor_with_path)
|
| 368 |
+
talking_documents.add_node('document_summarizer', document_summarizer_agent)
|
| 369 |
+
talking_documents.set_entry_point('document_extractor')
|
| 370 |
+
talking_documents.add_edge('document_extractor', 'document_summarizer')
|
| 371 |
+
pdf_graph = talking_documents.compile()
|
| 372 |
+
|
| 373 |
+
pdf_state = {'documents': [], 'summaries': []}
|
| 374 |
+
pdf_result = pdf_graph.invoke(pdf_state)
|
| 375 |
+
|
| 376 |
+
# Combine PDF and search results
|
| 377 |
+
combined_response = f"**PDF Summary:**\n{pdf_result['summaries'][0] if pdf_result['summaries'] else 'No summary available'}\n\n**Additional Information from Web:**\n{search_result['summaries'][0] if search_result['summaries'] else 'No additional information found'}"
|
| 378 |
+
|
| 379 |
+
response = combined_response
|
| 380 |
+
else:
|
| 381 |
+
# Regular PDF processing
|
| 382 |
+
def document_extractor_with_path(state: DocumentState) -> DocumentState:
|
| 383 |
+
return document_extractor_agent(state, tmp_pdf_path)
|
| 384 |
+
|
| 385 |
+
talking_documents = StateGraph(DocumentState)
|
| 386 |
+
talking_documents.add_node('document_extractor', document_extractor_with_path)
|
| 387 |
+
talking_documents.add_node('document_summarizer', document_summarizer_agent)
|
| 388 |
+
talking_documents.set_entry_point('document_extractor')
|
| 389 |
+
talking_documents.add_edge('document_extractor', 'document_summarizer')
|
| 390 |
+
graph = talking_documents.compile()
|
| 391 |
+
|
| 392 |
+
state = {'documents': [], 'summaries': []}
|
| 393 |
+
final_state = graph.invoke(state)
|
| 394 |
+
|
| 395 |
+
if final_state['summaries']:
|
| 396 |
+
response = final_state['summaries'][0]
|
| 397 |
+
else:
|
| 398 |
+
response = "Unable to process the PDF. Please check the file format."
|
| 399 |
+
|
| 400 |
+
# Clean up temporary file
|
| 401 |
+
os.unlink(tmp_pdf_path)
|
| 402 |
+
|
| 403 |
+
return response
|
| 404 |
+
|
| 405 |
+
except Exception as e:
|
| 406 |
+
return f"Error processing PDF: {str(e)}"
|
| 407 |
+
|
| 408 |
def respond(message, history, system_message, max_tokens, temperature, top_p, enable_search=False):
|
| 409 |
"""Enhanced chat function with optional Google search"""
|
| 410 |
if enable_search:
|
|
|
|
| 453 |
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
|
| 454 |
|
| 455 |
with gr.Column(scale=2):
|
| 456 |
+
chatbot = gr.Chatbot(type='messages')
|
| 457 |
msg = gr.Textbox(label="Message")
|
| 458 |
clear = gr.Button("Clear")
|
| 459 |
|
| 460 |
def user_input(message, history):
|
| 461 |
+
return "", history + [{"role": "user", "content": message}]
|
| 462 |
|
| 463 |
def bot_response(history, pdf_file, enable_search, system_message, max_tokens, temperature, top_p):
|
| 464 |
+
message = history[-1]["content"]
|
| 465 |
if pdf_file is not None:
|
| 466 |
+
response = process_pdf_and_chat_messages(pdf_file, message, history[:-1], system_message, max_tokens, temperature, top_p, enable_search)
|
| 467 |
else:
|
| 468 |
+
response = respond_messages(message, history[:-1], system_message, max_tokens, temperature, top_p, enable_search)
|
| 469 |
+
return history[:-1] + [{"role": "user", "content": message}, {"role": "assistant", "content": response}]
|
| 470 |
|
| 471 |
msg.submit(user_input, [msg, chatbot], [msg, chatbot], queue=False).then(
|
| 472 |
bot_response, [chatbot, pdf_upload, enable_search, system_message, max_tokens, temperature, top_p], chatbot
|
|
|
|
| 474 |
clear.click(lambda: None, None, chatbot, queue=False)
|
| 475 |
|
| 476 |
if __name__ == "__main__":
|
| 477 |
+
demo.launch(share=True)
|