Cohere Chatbot

import gradio as gr
import cohere
import os
import uuid
import secrets
import nltk
from unstructured.documents.html import HTMLDocument
import requests
from bs4 import BeautifulSoup

# Download and install NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Initialize Cohere client with API key
co = cohere.Client(os.getenv("COHERE_API_KEY"), client_name="huggingface-aya-23")

# Function to process HTML content from a given URL
def process_html_from_url(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an HTTPError for bad responses (4xx and 5xx)
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup.get_text()
    except requests.RequestException as e:
        print(f"Error retrieving HTML content: {e}")
        return None

# Function to generate response using Cohere chatbot
def generate_response(user_message, extracted_text, cid, token, history=None):
    if not token:
        raise gr.Error("Error loading.")
        
    if history is None:
        history = []
    if not cid:
        cid = str(uuid.uuid4())

    combined_message = f"{extracted_text}\n\n{user_message}"
    history.append(combined_message)
    
    stream = co.chat_stream(message=combined_message, conversation_id=cid, model='c4ai-aya-23', connectors=[], temperature=0.3)
    output = ""
    
    for idx, response in enumerate(stream):
        if response.event_type == "text-generation":
            output += response.text
        if idx == 0:
            history.append(" " + output)
        else:
            history[-1] = output
        chat = [
            (history[i].strip(), history[i + 1].strip())
            for i in range(0, len(history) - 1, 2)
        ] 
        yield chat, history, cid

# Function to clear chat
def clear_chat():
    return [], [], str(uuid.uuid4()), ""

# Function to handle URL input, unstructure the text, and submit to Cohere
def handle_unstructure_and_submit(url, user_message, cid, token, history):
    page_content = process_html_from_url(url)
    if page_content:
        for chat, _, _ in generate_response(user_message, page_content, cid, token, history):
            pass
        return chat, history, cid, page_content
    else:
        return "Failed to retrieve HTML content", "", "", ""

# Function to continue the conversation using the last extracted text
def continue_conversation(user_message, extracted_text, cid, token, history):
    if extracted_text:
        for chat, _, _ in generate_response(user_message, extracted_text, cid, token, history):
            pass
        return chat, history, cid
    else:
        return "No text extracted to continue the conversation.", "", ""

# Custom CSS for Gradio app
custom_css = """
#logo-img {
    display: block;
    margin-left: auto;
    margin-right: auto;
    width: 50%;
}
#chatbot {
    font-size: 16px;
    min-height: 400px;
}
#user-message {
    font-size: 16px;
}
.center-text {
    text-align: center;
    font-family: Arial, sans-serif;
}
.center-text h1 {
    font-size: 2em;
    font-weight: bold;
}
.center-text p {
    font-size: 1.2em;
    font-weight: bold;
}
"""

# Create Gradio interface
with gr.Blocks(analytics_enabled=False, css=custom_css) as demo:
    cid = gr.State("")
    token = gr.State(value=None)
    history = gr.State([])
    extracted_text = gr.State("")

    with gr.Row():
        gr.Markdown("""
        <div class="center-text">
        <h1>Cohere Chatbot</h1>
        <p><strong>Note</strong>: Aya 23 using Unstructured to extract text from web and process it.</p>
        <p><strong>Cohere Aya 23</strong>: <a href="https://cohere.com/research" target="_blank">Cohere for AI</a> and <a href="https://cohere.com/" target="_blank">Cohere</a></p>
        <p><strong>Unstructured</strong>: Open-Source Pre-Processing Tools for Unstructured Data</p>
        </div>
        """)

    with gr.Row():
        url_input = gr.Textbox(placeholder="Enter URL ...", label="URL", show_label=False, elem_id="url-input")

    with gr.Row():
        chatbot = gr.Chatbot(elem_id="chatbot", show_label=False)

    with gr.Row():
        user_message = gr.Textbox(placeholder="Ask anything ...", label="Input", show_label=False, elem_id="user-message")

    with gr.Row():
        submit_button = gr.Button("Unstructure Text and Submit to Cohere")
        continue_button = gr.Button("Continue Conversation")
        clear_button = gr.Button("Clear chat")

    submit_button.click(fn=handle_unstructure_and_submit, inputs=[url_input, user_message, cid, token, history], outputs=[chatbot, history, cid, extracted_text], concurrency_limit=32)
    continue_button.click(fn=continue_conversation, inputs=[user_message, extracted_text, cid, token, history], outputs=[chatbot, history, cid], concurrency_limit=32)
    clear_button.click(fn=clear_chat, inputs=None, outputs=[chatbot, history, cid, extracted_text], concurrency_limit=32)

    user_message.submit(lambda x: gr.update(value=""), None, [user_message], queue=False)
    submit_button.click(lambda x: gr.update(value=""), None, [user_message], queue=False)
    continue_button.click(lambda x: gr.update(value=""), None, [user_message], queue=False)
    clear_button.click(lambda x: gr.update(value=""), None, [user_message], queue=False)

    demo.load(lambda: secrets.token_hex(16), None, token)

if __name__ == "__main__":
    try:
        demo.queue(api_open=False, max_size=40).launch(show_api=False)
    except Exception as e:
        print(f"Error: {e}")