Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,8 +17,6 @@ from huggingface_hub import InferenceClient
|
|
| 17 |
import inspect
|
| 18 |
import logging
|
| 19 |
import shutil
|
| 20 |
-
import numpy as np
|
| 21 |
-
import soundfile as sf
|
| 22 |
|
| 23 |
|
| 24 |
# Set up basic configuration for logging
|
|
@@ -30,6 +28,7 @@ llama_cloud_api_key = os.environ.get("LLAMA_CLOUD_API_KEY")
|
|
| 30 |
ACCOUNT_ID = os.environ.get("CLOUDFARE_ACCOUNT_ID")
|
| 31 |
API_TOKEN = os.environ.get("CLOUDFLARE_AUTH_TOKEN")
|
| 32 |
API_BASE_URL = "https://api.cloudflare.com/client/v4/accounts/a17f03e0f049ccae0c15cdcf3b9737ce/ai/run/"
|
|
|
|
| 33 |
|
| 34 |
print(f"ACCOUNT_ID: {ACCOUNT_ID}")
|
| 35 |
print(f"CLOUDFLARE_AUTH_TOKEN: {API_TOKEN[:5]}..." if API_TOKEN else "Not set")
|
|
@@ -399,20 +398,13 @@ def summarize_web_results(query: str, search_results: List[Dict[str, str]], conv
|
|
| 399 |
return f"An error occurred during summarization: {str(e)}"
|
| 400 |
|
| 401 |
# Modify the existing respond function to handle both PDF and web search
|
| 402 |
-
def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs
|
| 403 |
-
if audio_input:
|
| 404 |
-
message = transcribe_audio(audio_input)
|
| 405 |
-
logging.info(f"Transcribed audio: {message}")
|
| 406 |
-
|
| 407 |
-
if not message.strip():
|
| 408 |
-
return "Please provide a text or audio query.", history
|
| 409 |
-
|
| 410 |
logging.info(f"User Query: {message}")
|
| 411 |
logging.info(f"Model Used: {model}")
|
| 412 |
logging.info(f"Selected Documents: {selected_docs}")
|
| 413 |
logging.info(f"Use Web Search: {use_web_search}")
|
| 414 |
|
| 415 |
-
|
| 416 |
|
| 417 |
if use_web_search:
|
| 418 |
original_query = message
|
|
@@ -433,9 +425,10 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
|
|
| 433 |
|
| 434 |
if final_summary:
|
| 435 |
conversation_manager.add_interaction(original_query, final_summary)
|
| 436 |
-
|
| 437 |
else:
|
| 438 |
-
|
|
|
|
| 439 |
else:
|
| 440 |
# Existing PDF search logic
|
| 441 |
try:
|
|
@@ -448,45 +441,41 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
|
|
| 448 |
relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
|
| 449 |
|
| 450 |
if not relevant_docs:
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
yield partial_response
|
| 469 |
else:
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
yield partial_response
|
| 473 |
except Exception as e:
|
| 474 |
logging.error(f"Error with {model}: {str(e)}")
|
| 475 |
if "microsoft/Phi-3-mini-4k-instruct" in model:
|
| 476 |
logging.info("Falling back to Mistral model due to Phi-3 error")
|
| 477 |
fallback_model = "mistralai/Mistral-7B-Instruct-v0.3"
|
| 478 |
-
|
| 479 |
else:
|
| 480 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
|
| 482 |
-
history[-1] = (message, response)
|
| 483 |
-
return response, history
|
| 484 |
-
except Exception as e:
|
| 485 |
-
logging.error(f"Error in respond: {str(e)}")
|
| 486 |
-
error_message = f"An error occurred: {str(e)}"
|
| 487 |
-
history[-1] = (message, error_message)
|
| 488 |
-
return error_message, history
|
| 489 |
-
|
| 490 |
logging.basicConfig(level=logging.DEBUG)
|
| 491 |
|
| 492 |
def get_response_from_cloudflare(prompt, context, query, num_calls=3, temperature=0.2, search_type="pdf"):
|
|
@@ -625,23 +614,15 @@ Write a detailed and complete response that answers the following user question:
|
|
| 625 |
|
| 626 |
logging.info("Finished generating response")
|
| 627 |
|
| 628 |
-
def
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
# Load the audio file
|
| 632 |
-
audio, sample_rate = sf.read(audio_file)
|
| 633 |
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
audio = audio.mean(axis=1)
|
| 637 |
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
audio = audio.astype(np.int16)
|
| 641 |
-
|
| 642 |
-
# Transcribe
|
| 643 |
-
result = client.automatic_speech_recognition(audio, sampling_rate=sample_rate)
|
| 644 |
-
return result["text"]
|
| 645 |
|
| 646 |
def vote(data: gr.LikeData):
|
| 647 |
if data.liked:
|
|
@@ -692,96 +673,40 @@ use_web_search = gr.Checkbox(label="Use Web Search", value=False)
|
|
| 692 |
|
| 693 |
custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
|
| 694 |
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
demo = gr.Interface(
|
| 698 |
-
fn=respond,
|
| 699 |
-
inputs=[
|
| 700 |
-
gr.Textbox(placeholder=custom_placeholder, container=False, scale=7),
|
| 701 |
-
gr.State([]), # for history
|
| 702 |
-
gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[3]),
|
| 703 |
-
gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
|
| 704 |
-
gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of API Calls"),
|
| 705 |
-
gr.Checkbox(label="Use Web Search", value=True),
|
| 706 |
-
gr.CheckboxGroup(label="Select documents to query"),
|
| 707 |
-
gr.Audio(sources="microphone", type="filepath")
|
| 708 |
-
],
|
| 709 |
-
outputs=[gr.Chatbot()],
|
| 710 |
-
title="AI-powered PDF Chat and Web Search Assistant",
|
| 711 |
-
description="Chat with your PDFs or use web search to answer questions. You can type or speak your query.",
|
| 712 |
-
theme=gr.themes.Soft(
|
| 713 |
-
primary_hue="orange",
|
| 714 |
-
secondary_hue="amber",
|
| 715 |
-
neutral_hue="gray",
|
| 716 |
-
font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]
|
| 717 |
-
).set(
|
| 718 |
-
body_background_fill_dark="#0c0505",
|
| 719 |
-
block_background_fill_dark="#0c0505",
|
| 720 |
-
block_border_width="1px",
|
| 721 |
-
block_title_background_fill_dark="#1b0f0f",
|
| 722 |
-
input_background_fill_dark="#140b0b",
|
| 723 |
-
button_secondary_background_fill_dark="#140b0b",
|
| 724 |
-
border_color_accent_dark="#1b0f0f",
|
| 725 |
-
border_color_primary_dark="#1b0f0f",
|
| 726 |
-
background_fill_secondary_dark="#0c0505",
|
| 727 |
-
color_accent_soft_dark="transparent",
|
| 728 |
-
code_background_fill_dark="#140b0b"
|
| 729 |
-
),
|
| 730 |
-
css=css,
|
| 731 |
-
examples=[
|
| 732 |
-
["Tell me about the contents of the uploaded PDFs."],
|
| 733 |
-
["What are the main topics discussed in the documents?"],
|
| 734 |
-
["Can you summarize the key points from the PDFs?"],
|
| 735 |
-
["What's the latest news about artificial intelligence?"]
|
| 736 |
-
],
|
| 737 |
-
cache_examples=False,
|
| 738 |
-
analytics_enabled=False,
|
| 739 |
-
)
|
| 740 |
|
| 741 |
-
#
|
| 742 |
-
# Add file upload functionality
|
| 743 |
with gr.Blocks() as demo:
|
| 744 |
-
|
| 745 |
-
|
| 746 |
-
likeable=True,
|
| 747 |
-
layout="bubble",
|
| 748 |
-
height=400,
|
| 749 |
-
value=initial_conversation()
|
| 750 |
-
)
|
| 751 |
-
state = gr.State([])
|
| 752 |
-
|
| 753 |
with gr.Row():
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
)
|
| 759 |
-
|
| 760 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 761 |
with gr.Accordion("⚙️ Parameters", open=False):
|
| 762 |
model = gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[3])
|
| 763 |
temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature")
|
| 764 |
num_calls = gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of API Calls")
|
| 765 |
use_web_search = gr.Checkbox(label="Use Web Search", value=True)
|
| 766 |
-
|
| 767 |
-
|
| 768 |
-
submit_button = gr.Button("Submit")
|
| 769 |
-
|
| 770 |
-
submit_button.click(
|
| 771 |
-
fn=respond,
|
| 772 |
-
inputs=[
|
| 773 |
-
text_input,
|
| 774 |
-
state,
|
| 775 |
-
model,
|
| 776 |
-
temperature,
|
| 777 |
-
num_calls,
|
| 778 |
-
use_web_search,
|
| 779 |
-
selected_docs,
|
| 780 |
-
audio_input
|
| 781 |
-
],
|
| 782 |
-
outputs=[chatbot, state]
|
| 783 |
-
)
|
| 784 |
-
|
| 785 |
# Add file upload functionality
|
| 786 |
gr.Markdown("## Upload and Manage PDF Documents")
|
| 787 |
with gr.Row():
|
|
@@ -793,35 +718,50 @@ with gr.Blocks() as demo:
|
|
| 793 |
update_output = gr.Textbox(label="Update Status")
|
| 794 |
delete_button = gr.Button("Delete Selected Documents")
|
| 795 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 796 |
update_button.click(
|
| 797 |
update_vectors,
|
| 798 |
inputs=[file_input, parser_dropdown],
|
| 799 |
-
outputs=[update_output,
|
| 800 |
)
|
| 801 |
|
| 802 |
refresh_button.click(
|
| 803 |
refresh_documents,
|
| 804 |
inputs=[],
|
| 805 |
-
outputs=[
|
| 806 |
)
|
| 807 |
|
| 808 |
delete_button.click(
|
| 809 |
delete_documents,
|
| 810 |
-
inputs=[
|
| 811 |
-
outputs=[update_output,
|
| 812 |
)
|
| 813 |
|
| 814 |
gr.Markdown(
|
| 815 |
"""
|
| 816 |
## How to use
|
| 817 |
-
1.
|
| 818 |
-
2.
|
| 819 |
-
3.
|
| 820 |
-
4.
|
| 821 |
-
5.
|
| 822 |
-
6.
|
| 823 |
-
7.
|
| 824 |
"""
|
| 825 |
)
|
|
|
|
| 826 |
if __name__ == "__main__":
|
| 827 |
-
demo.launch(share=True)
|
|
|
|
|
|
| 17 |
import inspect
|
| 18 |
import logging
|
| 19 |
import shutil
|
|
|
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
# Set up basic configuration for logging
|
|
|
|
| 28 |
ACCOUNT_ID = os.environ.get("CLOUDFARE_ACCOUNT_ID")
|
| 29 |
API_TOKEN = os.environ.get("CLOUDFLARE_AUTH_TOKEN")
|
| 30 |
API_BASE_URL = "https://api.cloudflare.com/client/v4/accounts/a17f03e0f049ccae0c15cdcf3b9737ce/ai/run/"
|
| 31 |
+
whisper_api = InferenceClient("openai/whisper-small", token=huggingface_token)
|
| 32 |
|
| 33 |
print(f"ACCOUNT_ID: {ACCOUNT_ID}")
|
| 34 |
print(f"CLOUDFLARE_AUTH_TOKEN: {API_TOKEN[:5]}..." if API_TOKEN else "Not set")
|
|
|
|
| 398 |
return f"An error occurred during summarization: {str(e)}"
|
| 399 |
|
| 400 |
# Modify the existing respond function to handle both PDF and web search
|
| 401 |
+
def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
logging.info(f"User Query: {message}")
|
| 403 |
logging.info(f"Model Used: {model}")
|
| 404 |
logging.info(f"Selected Documents: {selected_docs}")
|
| 405 |
logging.info(f"Use Web Search: {use_web_search}")
|
| 406 |
|
| 407 |
+
response = ""
|
| 408 |
|
| 409 |
if use_web_search:
|
| 410 |
original_query = message
|
|
|
|
| 425 |
|
| 426 |
if final_summary:
|
| 427 |
conversation_manager.add_interaction(original_query, final_summary)
|
| 428 |
+
response = final_summary
|
| 429 |
else:
|
| 430 |
+
response = "Unable to generate a response. Please try a different query."
|
| 431 |
+
|
| 432 |
else:
|
| 433 |
# Existing PDF search logic
|
| 434 |
try:
|
|
|
|
| 441 |
relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
|
| 442 |
|
| 443 |
if not relevant_docs:
|
| 444 |
+
response = "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
|
| 445 |
+
else:
|
| 446 |
+
context_str = "\n".join([doc.page_content for doc in relevant_docs])
|
| 447 |
+
logging.info(f"Context length: {len(context_str)}")
|
| 448 |
+
|
| 449 |
+
if model.startswith("duckduckgo/"):
|
| 450 |
+
# Use DuckDuckGo chat with context
|
| 451 |
+
for partial_response in get_response_from_duckduckgo(message, model, context_str, num_calls, temperature):
|
| 452 |
+
response += partial_response
|
| 453 |
+
elif model == "@cf/meta/llama-3.1-8b-instruct":
|
| 454 |
+
# Use Cloudflare API
|
| 455 |
+
for partial_response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="pdf"):
|
| 456 |
+
response += partial_response
|
| 457 |
+
else:
|
| 458 |
+
# Use Hugging Face API
|
| 459 |
+
for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
|
| 460 |
+
response += partial_response
|
|
|
|
| 461 |
else:
|
| 462 |
+
response = "No documents available. Please upload PDF documents to answer questions."
|
| 463 |
+
|
|
|
|
| 464 |
except Exception as e:
|
| 465 |
logging.error(f"Error with {model}: {str(e)}")
|
| 466 |
if "microsoft/Phi-3-mini-4k-instruct" in model:
|
| 467 |
logging.info("Falling back to Mistral model due to Phi-3 error")
|
| 468 |
fallback_model = "mistralai/Mistral-7B-Instruct-v0.3"
|
| 469 |
+
return respond(message, history, fallback_model, temperature, num_calls, use_web_search, selected_docs)
|
| 470 |
else:
|
| 471 |
+
response = f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
|
| 472 |
+
|
| 473 |
+
# Update the conversation history
|
| 474 |
+
history.append((message, response))
|
| 475 |
+
|
| 476 |
+
# Yield the updated history
|
| 477 |
+
yield history
|
| 478 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 479 |
logging.basicConfig(level=logging.DEBUG)
|
| 480 |
|
| 481 |
def get_response_from_cloudflare(prompt, context, query, num_calls=3, temperature=0.2, search_type="pdf"):
|
|
|
|
| 614 |
|
| 615 |
logging.info("Finished generating response")
|
| 616 |
|
| 617 |
+
def transcribe(audio_file):
|
| 618 |
+
if audio_file is None:
|
| 619 |
+
return ""
|
|
|
|
|
|
|
| 620 |
|
| 621 |
+
with open(audio_file, "rb") as f:
|
| 622 |
+
audio_data = f.read()
|
|
|
|
| 623 |
|
| 624 |
+
response = whisper_api(audio_data)
|
| 625 |
+
return response["text"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 626 |
|
| 627 |
def vote(data: gr.LikeData):
|
| 628 |
if data.liked:
|
|
|
|
| 673 |
|
| 674 |
custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
|
| 675 |
|
| 676 |
+
def update_textbox(transcription):
|
| 677 |
+
return gr.Textbox.update(value=transcription)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 678 |
|
| 679 |
+
# Update the Gradio interface
|
|
|
|
| 680 |
with gr.Blocks() as demo:
|
| 681 |
+
gr.Markdown("# AI-powered PDF Chat and Web Search Assistant with Speech Input")
|
| 682 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 683 |
with gr.Row():
|
| 684 |
+
with gr.Column(scale=1):
|
| 685 |
+
audio_input = gr.Audio(sources="microphone", type="filepath", label="Speak your query")
|
| 686 |
+
transcribe_button = gr.Button("Transcribe")
|
| 687 |
+
|
| 688 |
+
with gr.Column(scale=2):
|
| 689 |
+
chatbot = gr.Chatbot(
|
| 690 |
+
show_copy_button=True,
|
| 691 |
+
likeable=True,
|
| 692 |
+
layout="bubble",
|
| 693 |
+
height=400,
|
| 694 |
+
value=initial_conversation()
|
| 695 |
+
)
|
| 696 |
+
query_textbox = gr.Textbox(
|
| 697 |
+
placeholder="Ask a question about the uploaded PDFs or any topic",
|
| 698 |
+
container=False,
|
| 699 |
+
scale=7
|
| 700 |
+
)
|
| 701 |
+
submit_button = gr.Button("Submit")
|
| 702 |
+
|
| 703 |
with gr.Accordion("⚙️ Parameters", open=False):
|
| 704 |
model = gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[3])
|
| 705 |
temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature")
|
| 706 |
num_calls = gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of API Calls")
|
| 707 |
use_web_search = gr.Checkbox(label="Use Web Search", value=True)
|
| 708 |
+
document_selector = gr.CheckboxGroup(label="Select documents to query")
|
| 709 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 710 |
# Add file upload functionality
|
| 711 |
gr.Markdown("## Upload and Manage PDF Documents")
|
| 712 |
with gr.Row():
|
|
|
|
| 718 |
update_output = gr.Textbox(label="Update Status")
|
| 719 |
delete_button = gr.Button("Delete Selected Documents")
|
| 720 |
|
| 721 |
+
# Connect components
|
| 722 |
+
transcribe_button.click(
|
| 723 |
+
transcribe,
|
| 724 |
+
inputs=[audio_input],
|
| 725 |
+
outputs=[query_textbox]
|
| 726 |
+
)
|
| 727 |
+
|
| 728 |
+
submit_button.click(
|
| 729 |
+
respond,
|
| 730 |
+
inputs=[query_textbox, chatbot, model, temperature, num_calls, use_web_search, document_selector],
|
| 731 |
+
outputs=[chatbot]
|
| 732 |
+
)
|
| 733 |
+
|
| 734 |
update_button.click(
|
| 735 |
update_vectors,
|
| 736 |
inputs=[file_input, parser_dropdown],
|
| 737 |
+
outputs=[update_output, document_selector]
|
| 738 |
)
|
| 739 |
|
| 740 |
refresh_button.click(
|
| 741 |
refresh_documents,
|
| 742 |
inputs=[],
|
| 743 |
+
outputs=[document_selector]
|
| 744 |
)
|
| 745 |
|
| 746 |
delete_button.click(
|
| 747 |
delete_documents,
|
| 748 |
+
inputs=[document_selector],
|
| 749 |
+
outputs=[update_output, document_selector]
|
| 750 |
)
|
| 751 |
|
| 752 |
gr.Markdown(
|
| 753 |
"""
|
| 754 |
## How to use
|
| 755 |
+
1. Use the microphone to speak your query, then click "Transcribe", or type directly in the text box.
|
| 756 |
+
2. Click "Submit" to get a response from the AI.
|
| 757 |
+
3. Upload PDF documents using the file input at the bottom.
|
| 758 |
+
4. Select the PDF parser (pypdf or llamaparse) and click "Upload Document" to update the vector store.
|
| 759 |
+
5. Select the documents you want to query using the checkboxes.
|
| 760 |
+
6. Toggle "Use Web Search" to switch between PDF chat and web search.
|
| 761 |
+
7. Adjust Temperature and Number of API Calls to fine-tune the response generation.
|
| 762 |
"""
|
| 763 |
)
|
| 764 |
+
|
| 765 |
if __name__ == "__main__":
|
| 766 |
+
demo.launch(share=True)
|
| 767 |
+
Troubleshooting Python Audio Recording Issues - Claude
|