import gradio as gr import cohere import os import uuid import secrets import nltk from unstructured.documents.html import HTMLDocument import requests from bs4 import BeautifulSoup # Download and install NLTK data nltk.download('punkt') nltk.download('averaged_perceptron_tagger') # Initialize Cohere client with API key co = cohere.Client(os.getenv("COHERE_API_KEY"), client_name="huggingface-aya-23") # Function to process HTML content from a given URL def process_html_from_url(url): try: response = requests.get(url) response.raise_for_status() # Raise an HTTPError for bad responses (4xx and 5xx) soup = BeautifulSoup(response.text, 'html.parser') return soup.get_text() except requests.RequestException as e: print(f"Error retrieving HTML content: {e}") return None # Function to generate response using Cohere chatbot def generate_response(user_message, extracted_text, cid, token, history=None): if not token: raise gr.Error("Error loading.") if history is None: history = [] if not cid: cid = str(uuid.uuid4()) combined_message = f"{extracted_text}\n\n{user_message}" history.append(combined_message) stream = co.chat_stream(message=combined_message, conversation_id=cid, model='c4ai-aya-23', connectors=[], temperature=0.3) output = "" for idx, response in enumerate(stream): if response.event_type == "text-generation": output += response.text if idx == 0: history.append(" " + output) else: history[-1] = output chat = [ (history[i].strip(), history[i + 1].strip()) for i in range(0, len(history) - 1, 2) ] yield chat, history, cid # Function to clear chat def clear_chat(): return [], [], str(uuid.uuid4()), "" # Function to handle URL input, unstructure the text, and submit to Cohere def handle_unstructure_and_submit(url, user_message, cid, token, history): page_content = process_html_from_url(url) if page_content: for chat, _, _ in generate_response(user_message, page_content, cid, token, history): pass return chat, history, cid, page_content else: return "Failed to retrieve HTML content", "", "", "" # Function to continue the conversation using the last extracted text def continue_conversation(user_message, extracted_text, cid, token, history): if extracted_text: for chat, _, _ in generate_response(user_message, extracted_text, cid, token, history): pass return chat, history, cid else: return "No text extracted to continue the conversation.", "", "" # Custom CSS for Gradio app custom_css = """ #logo-img { display: block; margin-left: auto; margin-right: auto; width: 50%; } #chatbot { font-size: 16px; min-height: 400px; } #user-message { font-size: 16px; } .center-text { text-align: center; font-family: Arial, sans-serif; } .center-text h1 { font-size: 2em; font-weight: bold; } .center-text p { font-size: 1.2em; font-weight: bold; } """ # Create Gradio interface with gr.Blocks(analytics_enabled=False, css=custom_css) as demo: cid = gr.State("") token = gr.State(value=None) history = gr.State([]) extracted_text = gr.State("") with gr.Row(): gr.Markdown("""

Cohere Chatbot

Note: Aya 23 using Unstructured to extract text from web and process it.

Cohere Aya 23: Cohere for AI and Cohere

Unstructured: Open-Source Pre-Processing Tools for Unstructured Data

""") with gr.Row(): url_input = gr.Textbox(placeholder="Enter URL ...", label="URL", show_label=False, elem_id="url-input") with gr.Row(): chatbot = gr.Chatbot(elem_id="chatbot", show_label=False) with gr.Row(): user_message = gr.Textbox(placeholder="Ask anything ...", label="Input", show_label=False, elem_id="user-message") with gr.Row(): submit_button = gr.Button("Unstructure Text and Submit to Cohere") continue_button = gr.Button("Continue Conversation") clear_button = gr.Button("Clear chat") submit_button.click(fn=handle_unstructure_and_submit, inputs=[url_input, user_message, cid, token, history], outputs=[chatbot, history, cid, extracted_text], concurrency_limit=32) continue_button.click(fn=continue_conversation, inputs=[user_message, extracted_text, cid, token, history], outputs=[chatbot, history, cid], concurrency_limit=32) clear_button.click(fn=clear_chat, inputs=None, outputs=[chatbot, history, cid, extracted_text], concurrency_limit=32) user_message.submit(lambda x: gr.update(value=""), None, [user_message], queue=False) submit_button.click(lambda x: gr.update(value=""), None, [user_message], queue=False) continue_button.click(lambda x: gr.update(value=""), None, [user_message], queue=False) clear_button.click(lambda x: gr.update(value=""), None, [user_message], queue=False) demo.load(lambda: secrets.token_hex(16), None, token) if __name__ == "__main__": try: demo.queue(api_open=False, max_size=40).launch(show_api=False) except Exception as e: print(f"Error: {e}")