File size: 5,529 Bytes
d3a1ab7 7fa8202 d3a1ab7 50ea8e2 d3a1ab7 541988e ed28f2f d3a1ab7 ed28f2f d3a1ab7 541988e d3a1ab7 ed28f2f d3a1ab7 ed28f2f d3a1ab7 541988e d3a1ab7 f0d2e1a d3a1ab7 65a04ae ed28f2f d3a1ab7 65a04ae f0d2e1a d3a1ab7 f0d2e1a d3a1ab7 541988e d3a1ab7 f0d2e1a d3a1ab7 541988e d3a1ab7 f0d2e1a d3a1ab7 f0d2e1a d3a1ab7 65a04ae d3a1ab7 65a04ae 541988e ed28f2f d3a1ab7 541988e f0d2e1a d3a1ab7 541988e d3a1ab7 50ea8e2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | import gradio as gr
import cohere
import os
import uuid
import secrets
import nltk
from unstructured.documents.html import HTMLDocument
import requests
from bs4 import BeautifulSoup
# Download and install NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
# Initialize Cohere client with API key
co = cohere.Client(os.getenv("COHERE_API_KEY"), client_name="huggingface-aya-23")
# Function to process HTML content from a given URL
def process_html_from_url(url):
try:
response = requests.get(url)
response.raise_for_status() # Raise an HTTPError for bad responses (4xx and 5xx)
soup = BeautifulSoup(response.text, 'html.parser')
return soup.get_text()
except requests.RequestException as e:
print(f"Error retrieving HTML content: {e}")
return None
# Function to generate response using Cohere chatbot
def generate_response(user_message, extracted_text, cid, token, history=None):
if not token:
raise gr.Error("Error loading.")
if history is None:
history = []
if not cid:
cid = str(uuid.uuid4())
combined_message = f"{extracted_text}\n\n{user_message}"
history.append(combined_message)
stream = co.chat_stream(message=combined_message, conversation_id=cid, model='c4ai-aya-23', connectors=[], temperature=0.3)
output = ""
for idx, response in enumerate(stream):
if response.event_type == "text-generation":
output += response.text
if idx == 0:
history.append(" " + output)
else:
history[-1] = output
chat = [
(history[i].strip(), history[i + 1].strip())
for i in range(0, len(history) - 1, 2)
]
yield chat, history, cid
# Function to clear chat
def clear_chat():
return [], [], str(uuid.uuid4()), ""
# Function to handle URL input, unstructure the text, and submit to Cohere
def handle_unstructure_and_submit(url, user_message, cid, token, history):
page_content = process_html_from_url(url)
if page_content:
for chat, _, _ in generate_response(user_message, page_content, cid, token, history):
pass
return chat, history, cid, page_content
else:
return "Failed to retrieve HTML content", "", "", ""
# Function to continue the conversation using the last extracted text
def continue_conversation(user_message, extracted_text, cid, token, history):
if extracted_text:
for chat, _, _ in generate_response(user_message, extracted_text, cid, token, history):
pass
return chat, history, cid
else:
return "No text extracted to continue the conversation.", "", ""
# Custom CSS for Gradio app
custom_css = """
#logo-img {
display: block;
margin-left: auto;
margin-right: auto;
width: 50%;
}
#chatbot {
font-size: 16px;
min-height: 400px;
}
#user-message {
font-size: 16px;
}
.center-text {
text-align: center;
font-family: Arial, sans-serif;
}
.center-text h1 {
font-size: 2em;
font-weight: bold;
}
.center-text p {
font-size: 1.2em;
font-weight: bold;
}
"""
# Create Gradio interface
with gr.Blocks(analytics_enabled=False, css=custom_css) as demo:
cid = gr.State("")
token = gr.State(value=None)
history = gr.State([])
extracted_text = gr.State("")
with gr.Row():
gr.Markdown("""
<div class="center-text">
<h1>Cohere Chatbot</h1>
<p><strong>Note</strong>: Aya 23 using Unstructured to extract text from web and process it.</p>
<p><strong>Cohere Aya 23</strong>: <a href="https://cohere.com/research" target="_blank">Cohere for AI</a> and <a href="https://cohere.com/" target="_blank">Cohere</a></p>
<p><strong>Unstructured</strong>: Open-Source Pre-Processing Tools for Unstructured Data</p>
</div>
""")
with gr.Row():
url_input = gr.Textbox(placeholder="Enter URL ...", label="URL", show_label=False, elem_id="url-input")
with gr.Row():
chatbot = gr.Chatbot(elem_id="chatbot", show_label=False)
with gr.Row():
user_message = gr.Textbox(placeholder="Ask anything ...", label="Input", show_label=False, elem_id="user-message")
with gr.Row():
submit_button = gr.Button("Unstructure Text and Submit to Cohere")
continue_button = gr.Button("Continue Conversation")
clear_button = gr.Button("Clear chat")
submit_button.click(fn=handle_unstructure_and_submit, inputs=[url_input, user_message, cid, token, history], outputs=[chatbot, history, cid, extracted_text], concurrency_limit=32)
continue_button.click(fn=continue_conversation, inputs=[user_message, extracted_text, cid, token, history], outputs=[chatbot, history, cid], concurrency_limit=32)
clear_button.click(fn=clear_chat, inputs=None, outputs=[chatbot, history, cid, extracted_text], concurrency_limit=32)
user_message.submit(lambda x: gr.update(value=""), None, [user_message], queue=False)
submit_button.click(lambda x: gr.update(value=""), None, [user_message], queue=False)
continue_button.click(lambda x: gr.update(value=""), None, [user_message], queue=False)
clear_button.click(lambda x: gr.update(value=""), None, [user_message], queue=False)
demo.load(lambda: secrets.token_hex(16), None, token)
if __name__ == "__main__":
try:
demo.queue(api_open=False, max_size=40).launch(show_api=False)
except Exception as e:
print(f"Error: {e}")
|