| import gradio as gr |
| import cohere |
| import os |
| import uuid |
| import secrets |
| import nltk |
| from unstructured.documents.html import HTMLDocument |
| import requests |
| from bs4 import BeautifulSoup |
|
|
| |
| nltk.download('punkt') |
| nltk.download('averaged_perceptron_tagger') |
|
|
| |
| co = cohere.Client(os.getenv("COHERE_API_KEY"), client_name="huggingface-aya-23") |
|
|
| |
| def process_html_from_url(url): |
| try: |
| response = requests.get(url) |
| response.raise_for_status() |
| soup = BeautifulSoup(response.text, 'html.parser') |
| return soup.get_text() |
| except requests.RequestException as e: |
| print(f"Error retrieving HTML content: {e}") |
| return None |
|
|
| |
| def generate_response(user_message, extracted_text, cid, token, history=None): |
| if not token: |
| raise gr.Error("Error loading.") |
| |
| if history is None: |
| history = [] |
| if not cid: |
| cid = str(uuid.uuid4()) |
|
|
| combined_message = f"{extracted_text}\n\n{user_message}" |
| history.append(combined_message) |
| |
| stream = co.chat_stream(message=combined_message, conversation_id=cid, model='c4ai-aya-23', connectors=[], temperature=0.3) |
| output = "" |
| |
| for idx, response in enumerate(stream): |
| if response.event_type == "text-generation": |
| output += response.text |
| if idx == 0: |
| history.append(" " + output) |
| else: |
| history[-1] = output |
| chat = [ |
| (history[i].strip(), history[i + 1].strip()) |
| for i in range(0, len(history) - 1, 2) |
| ] |
| yield chat, history, cid |
|
|
| |
| def clear_chat(): |
| return [], [], str(uuid.uuid4()), "" |
|
|
| |
| def handle_unstructure_and_submit(url, user_message, cid, token, history): |
| page_content = process_html_from_url(url) |
| if page_content: |
| for chat, _, _ in generate_response(user_message, page_content, cid, token, history): |
| pass |
| return chat, history, cid, page_content |
| else: |
| return "Failed to retrieve HTML content", "", "", "" |
|
|
| |
| def continue_conversation(user_message, extracted_text, cid, token, history): |
| if extracted_text: |
| for chat, _, _ in generate_response(user_message, extracted_text, cid, token, history): |
| pass |
| return chat, history, cid |
| else: |
| return "No text extracted to continue the conversation.", "", "" |
|
|
| |
| custom_css = """ |
| #logo-img { |
| display: block; |
| margin-left: auto; |
| margin-right: auto; |
| width: 50%; |
| } |
| #chatbot { |
| font-size: 16px; |
| min-height: 400px; |
| } |
| #user-message { |
| font-size: 16px; |
| } |
| .center-text { |
| text-align: center; |
| font-family: Arial, sans-serif; |
| } |
| .center-text h1 { |
| font-size: 2em; |
| font-weight: bold; |
| } |
| .center-text p { |
| font-size: 1.2em; |
| font-weight: bold; |
| } |
| """ |
|
|
| |
| with gr.Blocks(analytics_enabled=False, css=custom_css) as demo: |
| cid = gr.State("") |
| token = gr.State(value=None) |
| history = gr.State([]) |
| extracted_text = gr.State("") |
|
|
| with gr.Row(): |
| gr.Markdown(""" |
| <div class="center-text"> |
| <h1>Cohere Chatbot</h1> |
| <p><strong>Note</strong>: Aya 23 using Unstructured to extract text from web and process it.</p> |
| <p><strong>Cohere Aya 23</strong>: <a href="https://cohere.com/research" target="_blank">Cohere for AI</a> and <a href="https://cohere.com/" target="_blank">Cohere</a></p> |
| <p><strong>Unstructured</strong>: Open-Source Pre-Processing Tools for Unstructured Data</p> |
| </div> |
| """) |
|
|
| with gr.Row(): |
| url_input = gr.Textbox(placeholder="Enter URL ...", label="URL", show_label=False, elem_id="url-input") |
|
|
| with gr.Row(): |
| chatbot = gr.Chatbot(elem_id="chatbot", show_label=False) |
|
|
| with gr.Row(): |
| user_message = gr.Textbox(placeholder="Ask anything ...", label="Input", show_label=False, elem_id="user-message") |
|
|
| with gr.Row(): |
| submit_button = gr.Button("Unstructure Text and Submit to Cohere") |
| continue_button = gr.Button("Continue Conversation") |
| clear_button = gr.Button("Clear chat") |
|
|
| submit_button.click(fn=handle_unstructure_and_submit, inputs=[url_input, user_message, cid, token, history], outputs=[chatbot, history, cid, extracted_text], concurrency_limit=32) |
| continue_button.click(fn=continue_conversation, inputs=[user_message, extracted_text, cid, token, history], outputs=[chatbot, history, cid], concurrency_limit=32) |
| clear_button.click(fn=clear_chat, inputs=None, outputs=[chatbot, history, cid, extracted_text], concurrency_limit=32) |
|
|
| user_message.submit(lambda x: gr.update(value=""), None, [user_message], queue=False) |
| submit_button.click(lambda x: gr.update(value=""), None, [user_message], queue=False) |
| continue_button.click(lambda x: gr.update(value=""), None, [user_message], queue=False) |
| clear_button.click(lambda x: gr.update(value=""), None, [user_message], queue=False) |
|
|
| demo.load(lambda: secrets.token_hex(16), None, token) |
|
|
| if __name__ == "__main__": |
| try: |
| demo.queue(api_open=False, max_size=40).launch(show_api=False) |
| except Exception as e: |
| print(f"Error: {e}") |
|
|