Spaces:

mobenta
/

cohescrap

Runtime error

File size: 5,529 Bytes

d3a1ab7
 
7fa8202
d3a1ab7
 
 
 
 
 
 
 
 
 
 
 
50ea8e2
d3a1ab7
 
 
541988e
 
 
 
 
 
 
ed28f2f
d3a1ab7
 
ed28f2f
d3a1ab7
 
 
 
 
541988e
d3a1ab7
 
ed28f2f
 
d3a1ab7
ed28f2f
d3a1ab7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541988e
d3a1ab7
f0d2e1a
d3a1ab7
65a04ae
 
ed28f2f
d3a1ab7
65a04ae
 
f0d2e1a
d3a1ab7
f0d2e1a
d3a1ab7
541988e
 
 
 
 
 
 
 
 
d3a1ab7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0d2e1a
 
 
 
 
 
 
 
 
d3a1ab7
 
 
 
 
 
 
 
541988e
d3a1ab7
 
 
 
f0d2e1a
 
 
 
d3a1ab7
 
 
 
 
 
f0d2e1a
 
 
d3a1ab7
65a04ae
d3a1ab7
 
65a04ae
541988e
ed28f2f
d3a1ab7
541988e
 
f0d2e1a
d3a1ab7
 
 
541988e
d3a1ab7
 
 
 
 
 
 
 
50ea8e2

import gradio as gr
import cohere
import os
import uuid
import secrets
import nltk
from unstructured.documents.html import HTMLDocument
import requests
from bs4 import BeautifulSoup

# Download and install NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Initialize Cohere client with API key
co = cohere.Client(os.getenv("COHERE_API_KEY"), client_name="huggingface-aya-23")

# Function to process HTML content from a given URL
def process_html_from_url(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an HTTPError for bad responses (4xx and 5xx)
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup.get_text()
    except requests.RequestException as e:
        print(f"Error retrieving HTML content: {e}")
        return None

# Function to generate response using Cohere chatbot
def generate_response(user_message, extracted_text, cid, token, history=None):
    if not token:
        raise gr.Error("Error loading.")
        
    if history is None:
        history = []
    if not cid:
        cid = str(uuid.uuid4())

    combined_message = f"{extracted_text}\n\n{user_message}"
    history.append(combined_message)
    
    stream = co.chat_stream(message=combined_message, conversation_id=cid, model='c4ai-aya-23', connectors=[], temperature=0.3)
    output = ""
    
    for idx, response in enumerate(stream):
        if response.event_type == "text-generation":
            output += response.text
        if idx == 0:
            history.append(" " + output)
        else:
            history[-1] = output
        chat = [
            (history[i].strip(), history[i + 1].strip())
            for i in range(0, len(history) - 1, 2)
        ] 
        yield chat, history, cid

# Function to clear chat
def clear_chat():
    return [], [], str(uuid.uuid4()), ""

# Function to handle URL input, unstructure the text, and submit to Cohere
def handle_unstructure_and_submit(url, user_message, cid, token, history):
    page_content = process_html_from_url(url)
    if page_content:
        for chat, _, _ in generate_response(user_message, page_content, cid, token, history):
            pass
        return chat, history, cid, page_content
    else:
        return "Failed to retrieve HTML content", "", "", ""

# Function to continue the conversation using the last extracted text
def continue_conversation(user_message, extracted_text, cid, token, history):
    if extracted_text:
        for chat, _, _ in generate_response(user_message, extracted_text, cid, token, history):
            pass
        return chat, history, cid
    else:
        return "No text extracted to continue the conversation.", "", ""

# Custom CSS for Gradio app
custom_css = """
#logo-img {
    display: block;
    margin-left: auto;
    margin-right: auto;
    width: 50%;
}
#chatbot {
    font-size: 16px;
    min-height: 400px;
}
#user-message {
    font-size: 16px;
}
.center-text {
    text-align: center;
    font-family: Arial, sans-serif;
}
.center-text h1 {
    font-size: 2em;
    font-weight: bold;
}
.center-text p {
    font-size: 1.2em;
    font-weight: bold;
}
"""

# Create Gradio interface
with gr.Blocks(analytics_enabled=False, css=custom_css) as demo:
    cid = gr.State("")
    token = gr.State(value=None)
    history = gr.State([])
    extracted_text = gr.State("")

    with gr.Row():
        gr.Markdown("""
        <div class="center-text">
        <h1>Cohere Chatbot</h1>
        <p><strong>Note</strong>: Aya 23 using Unstructured to extract text from web and process it.</p>
        <p><strong>Cohere Aya 23</strong>: <a href="https://cohere.com/research" target="_blank">Cohere for AI</a> and <a href="https://cohere.com/" target="_blank">Cohere</a></p>
        <p><strong>Unstructured</strong>: Open-Source Pre-Processing Tools for Unstructured Data</p>
        </div>
        """)

    with gr.Row():
        url_input = gr.Textbox(placeholder="Enter URL ...", label="URL", show_label=False, elem_id="url-input")

    with gr.Row():
        chatbot = gr.Chatbot(elem_id="chatbot", show_label=False)

    with gr.Row():
        user_message = gr.Textbox(placeholder="Ask anything ...", label="Input", show_label=False, elem_id="user-message")

    with gr.Row():
        submit_button = gr.Button("Unstructure Text and Submit to Cohere")
        continue_button = gr.Button("Continue Conversation")
        clear_button = gr.Button("Clear chat")

    submit_button.click(fn=handle_unstructure_and_submit, inputs=[url_input, user_message, cid, token, history], outputs=[chatbot, history, cid, extracted_text], concurrency_limit=32)
    continue_button.click(fn=continue_conversation, inputs=[user_message, extracted_text, cid, token, history], outputs=[chatbot, history, cid], concurrency_limit=32)
    clear_button.click(fn=clear_chat, inputs=None, outputs=[chatbot, history, cid, extracted_text], concurrency_limit=32)

    user_message.submit(lambda x: gr.update(value=""), None, [user_message], queue=False)
    submit_button.click(lambda x: gr.update(value=""), None, [user_message], queue=False)
    continue_button.click(lambda x: gr.update(value=""), None, [user_message], queue=False)
    clear_button.click(lambda x: gr.update(value=""), None, [user_message], queue=False)

    demo.load(lambda: secrets.token_hex(16), None, token)

if __name__ == "__main__":
    try:
        demo.queue(api_open=False, max_size=40).launch(show_api=False)
    except Exception as e:
        print(f"Error: {e}")