File size: 6,144 Bytes
7673c2b
 
3d50e48
06443cb
ff349dd
 
 
7673c2b
 
 
ff349dd
7673c2b
 
 
 
625ee44
7673c2b
 
3d50e48
 
 
06443cb
 
 
1700027
 
 
 
7673c2b
 
 
fe005c6
1700027
7673c2b
06443cb
 
1700027
7673c2b
 
bd54f7b
7673c2b
c13df5d
625ee44
7673c2b
 
 
 
1700027
7673c2b
 
 
1700027
7673c2b
06443cb
 
1700027
7673c2b
 
 
 
3d50e48
06443cb
 
3d50e48
7673c2b
06443cb
 
1700027
 
45e8e5a
1700027
 
 
 
06443cb
 
 
 
 
1700027
7673c2b
1700027
 
bd54f7b
1700027
c13df5d
45e8e5a
1700027
 
 
 
 
 
 
 
 
 
06443cb
 
1700027
 
 
 
 
 
 
3d50e48
7673c2b
06443cb
 
 
 
 
 
 
 
ec0dea5
06443cb
 
 
 
 
 
7673c2b
 
 
16a932c
 
 
06443cb
1700027
16a932c
ec0dea5
1700027
16a932c
 
1700027
16a932c
 
1700027
16a932c
 
 
1700027
16a932c
 
7673c2b
16a932c
3d50e48
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
from groq import Groq
import gradio as gr
import time
import threading
import os

api_key = os.getenv("TriviaVilla")

# Initialize the Groq client with your API key
client = Groq(
    api_key= api_key
)

# List to maintain the conversation history, starting with a default prompt
conversation_history = [
    {"role": "system", "content": "You are an experienced assistant in a trivia game focused on Nigerian music. With over 40 years of keeping track of Nigerian Music, You have great knowledge of the Nigerian Music Industry"}
]

# Variable to track the last interaction time
last_interaction_time = time.time()

# Lock to synchronize access to shared variables
lock = threading.Lock()

# Function to count tokens (approximation)
def count_tokens(messages):
    return sum(len(message["content"].split()) for message in messages)

# Function to get the initial LLM output and start the conversation
def start_trivia_game():
    # Initial message to start the game
    initial_message = "Start a Trivia on Nigerian music. Welcome the user and tell the user to type Start to start. Tell the user to type End to finish the game. Make your questions always concise, no irrelevant text. Expect the precise answer as text or the alphabet of the option from users as the answer. Make 'None of the Above' an option available so that if the answer to the question is not in the options, users can choose 'None of the above'. Assess the answers and provide the percentage score based on the present and previous scores and present the next question to keep it going. The highest percentage is 100% and no percentage assessment should be greater than this."

    # Add the initial message to the conversation history
    with lock:
        conversation_history.append({"role": "user", "content": initial_message})

    # Get completion from the LLM for the initial question
    completion = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=conversation_history,
        temperature=0.3,
        max_tokens=4096,
        top_p=1,
        stream=True,
        stop=None,
    )

    llm_output = ""
    for chunk in completion:
        llm_output += chunk.choices[0].delta.content or ""

    # Add the assistant's response to the conversation history
    with lock:
        conversation_history.append({"role": "assistant", "content": llm_output})

    return llm_output

# Function to handle user response and continue the conversation
def continue_trivia_game(user_response):
    global last_interaction_time
    with lock:
        last_interaction_time = time.time()  # Update the last interaction time

    # Add user's response to the conversation history
    with lock:
        conversation_history.append({"role": "user", "content": user_response})

    # Token limit management
    max_tokens = 8000  # Maximum token limit for the LLM (example value)
    current_tokens = count_tokens(conversation_history)

    while current_tokens > max_tokens:
        # Remove the oldest user-assistant pair
        with lock:
            if len(conversation_history) > 2:
                conversation_history.pop(1)  # Removing the second item as the first is the system message
                conversation_history.pop(1)  # Remove the corresponding assistant response
            current_tokens = count_tokens(conversation_history)

    # Get completion from the LLM for the user's response
    try:
        completion = client.chat.completions.create(
            model="llama-3.3-70b-versatile",
            messages=conversation_history,
            temperature=0.3,
            max_tokens=8000,
            top_p=1,
            stream=True,
            stop=None,
        )

        llm_output = ""
        for chunk in completion:
            llm_output += chunk.choices[0].delta.content or ""

        # Add the assistant's response to the conversation history
        with lock:
            conversation_history.append({"role": "assistant", "content": llm_output})

        return llm_output
    except Exception as e:
        # Check for specific rate limit error
        if "rate_limit_exceeded" in str(e):
            return "You've reached the maximum number of requests. Please wait a few minutes before trying again."
        else:
            return f"An error occurred. Try again in 10 minutes: {str(e)}"

# Function to reset the session after inactivity
def reset_session():
    global conversation_history
    while True:
        time.sleep(10)  # Check every 10 seconds
        with lock:
            if time.time() - last_interaction_time > 180:  # 3 minutes of inactivity
                conversation_history = [
                    {"role": "system", "content": "You are an experienced assistant in a trivia game focused on Nigerian music. With over 40 years of keeping track of Nigerian Music, You have great knowledge of the Nigerian Music Industry"}
                ]

# Start the background thread for session reset
reset_thread = threading.Thread(target=reset_session, daemon=True)
reset_thread.start()

# Start the game and get the initial LLM output
initial_output = start_trivia_game()

# Using gr.Blocks to create the interface
with gr.Blocks() as demo:
    # Title and Description
    gr.Markdown("# TriviaVilla\n How much do you know about the Nigerian Music Industry? Here is a Trivia to test your knowledge. Developed using LLama 3.1 LLM. This model may hallucinate sometimes, but you can guide it through your prompt.")

    # LLM Output Textbox
    llm_output = gr.Textbox(label="LLM Output", placeholder="", lines=10, value=initial_output)

    # User Response Textbox
    user_response = gr.Textbox(label="Your Response", placeholder="Type your response here", lines=3)

    # Button to submit the user's response and update the LLM output
    submit_button = gr.Button("Submit")

    # Function to update the LLM output upon submission
    def update_llm_output(user_input):
        return continue_trivia_game(user_input)

    # Define interactions
    submit_button.click(fn=update_llm_output, inputs=user_response, outputs=llm_output)

# Launch the Gradio app
demo.launch()