Spaces:

Kirco
/

token_prediction_vis

Sleeping

File size: 3,401 Bytes

import openai
import gradio as gr
import os
import numpy as np

# Ensure you have set your OpenAI API key as an environment variable
openai.api_key = os.getenv("OPENAI_API_KEY")

client = openai.OpenAI()

def logprob_to_percentage(logprob):
    """Convert log probability to percentage."""
    prob = np.exp(logprob)
    return prob * 100

def highlight_tokens_with_cumulative(cumulative, token, colors, color_idx):
    """Combine cumulative sentence with a highlighted token using background color."""
    highlighted_token = f"<span style='background-color: {colors[color_idx]}; color: black;'>{repr(token)}</span>"
    return f"<span class='muted'>{cumulative}</span> {highlighted_token}"

def compute_logprobs(input_text):
    try:
        output_html = """
        <style>
            table { border-collapse: collapse; width: auto; margin-top: 10px; margin-left: 0; }
            td { text-align: left; padding: 4px; }
            .muted { color: #888; }
        </style>
        <div>
        """
        
        colors = [
            "#CCCCCC"
            #"#FF5733",  # Vibrant Red-Orange
            #"#FFBD33",  # Vibrant Orange
            #"#33FF57",  # Vibrant Green
            #"#3357FF",  # Vibrant Blue
            #"#57FF33",  # Vibrant Lime
            #"#FF33A1",  # Vibrant Pink
        ]

        # Process the sentence, starting from full sentence down to first token
        steps = len(input_text.split())
        for i in range(steps, 0, -1):
            sub_prompt = " ".join(input_text.split()[:i])
            response = client.completions.create(
                model="gpt-3.5-turbo-instruct",
                prompt=sub_prompt,
                max_tokens=1,
                logprobs=5  
            )

            # Extract the first completion choice
            choice = response.choices[0]
            print(choice)

            # Prepare the table without headers
            table_html = "<table><tbody>"

            # Extract and format probabilities
            top_predictions = choice.logprobs.top_logprobs[0].items()
            color_idx = 0
            for token, logprob in top_predictions:
                probability = logprob_to_percentage(logprob)
                combined = highlight_tokens_with_cumulative(sub_prompt, token, colors, color_idx)
                table_html += f"<tr><td>{combined}</td><td>{probability:.2f}%</td></tr>"
                color_idx = (color_idx + 1) % len(colors)

            table_html += "</tbody></table>"

            # Append results for the current step
            output_html += f"<div><strong>Eingabe: {sub_prompt}</strong></div>"
            output_html += table_html
            output_html += "<br/>"

        output_html += "</div>"
        return output_html

    except Exception as e:
        print(f"Error: {e}")
        pass


# Set up Gradio using Blocks for flexibility with custom theme
with gr.Blocks() as demo:
    gr.Markdown("## Wahrscheinlichkeiten von Token-Vorhersagen")
    gr.Markdown("Satz eingeben um die Wahrscheinlichkeiten der jeweils nächsten Token zu generieren")
    
    input_box = gr.Textbox(label="", placeholder="Etwas tippen...", lines=1)
    output_html = gr.HTML(label="Wahrscheinlichkeiten")
    
    # Link function to button and input
    input_box.change(compute_logprobs, inputs=input_box, outputs=output_html)

# Launch the Gradio app without flagging
demo.launch()