# ██████╗   █████╗  ██████╗  ██╗  ██╗
# ██╔══██╗ ██╔══██╗ ██╔══██╗ ██║ ██╔╝
# ██║  ██║ ███████║ ██████╔╝ █████╔╝
# ██║  ██║ ██╔══██║ ██╔══██╗ ██╔═██╗
# ██████╔╝ ██║  ██║ ██║  ██║ ██║  ██╗
# ╚═════╝  ╚═╝  ╚═╝ ╚═╝  ╚═╝ ╚═╝  ╚═╝
#
# ███████╗ ███╗   ██╗  ██████╗  ████╗ ███╗   ██╗ ███████╗
# ██╔════╝ ████╗  ██║ ██╔════╝   ██╔╝ ████╗  ██║ ██╔════╝
# █████╗   ██╔██╗ ██║ ██║  ███╗  ██║  ██╔██╗ ██║ █████╗
# ██╔══╝   ██║╚██╗██║ ██║   ██║  ██║  ██║╚██╗██║ ██╔══╝
# ███████╗ ██║ ╚████║ ╚██████╔╝ ████╗ ██║ ╚████║ ███████╗
# ╚══════╝ ╚═╝  ╚═══╝  ╚═════╝  ╚═══╝ ╚═╝  ╚═══╝ ╚══════╝


# This is a demo for the REN-AI architecture that will be used in our upcoming products. Please visit darkengine.ai to learn more!

import gradio as gr
import os
from groq import Groq
from datetime import datetime
import pytz

# We use Groq for our API demo to showcase models that can run locally on your device (just faster with the LPU engine, if you have $20k you can do this speed locally lmao)
# Local inference speeds will depend on your device (regarding the Dark Engine app or DarkOS for the REN-X3 robot)
api_key = os.getenv("GROQ_API_KEY")
# We are still updating the final system prompt architecture that will be shared in the future. For now, we store it as an secret variable on HF spaces
system_prompt = os.getenv("SYSTEM_PROMPT")

# Not calculated, just rounded. This can be updated easily...
MAX_TOKENS = 8192 
RESPONSE_TOKENS = 1024
USER_INPUT_TOKENS = 250
TOKEN_THRESHOLD = 7000

# Ren needs timestamps
def get_current_central_time():
    central = pytz.timezone('America/Chicago')
    return datetime.now(central).strftime('%Y-%m-%d %H:%M:%S')

# Ren needs to have weights initiated and then updated between interaction-states
def create_system_message(weights, short=False):
    current_time = get_current_central_time()
    if short:
        return f"Updated weights: {weights}, Current time: {current_time}"
    return system_prompt.format(
        personality=weights['personality'],
        selfReflection=weights['selfReflection'],
        abstractionLevel=weights['abstractionLevel'],
        metaCognition=weights['metaCognition'],
        current_time=current_time
    )

def calculate_token_count(text):
    return len(text.split())

def trim_conversation_history(conversation_history, max_tokens):
    total_tokens = sum(calculate_token_count(message['content']) for message in conversation_history)
    while total_tokens > max_tokens and len(conversation_history) > 1:
        removed_message = conversation_history.pop(0)
        total_tokens -= calculate_token_count(removed_message['content'])
    return conversation_history

async def predict(message, chat_history, personality, selfReflection, abstractionLevel, metaCognition):
    current_weights = {
    "personality": 100,
    "selfReflection": 100,
    "abstractionLevel": 100,
    "metaCognition": 100
    }
    # This stores between clear-states to mimic long term memory with traditional database storage. We intend on adding Vector DB solutions soon!
    # Might try pinecone for online (easy setup) or weaviate specifically for local stuff
    conversation_history = []

    # Reset command for conversation
    # Other commands can be added to expand features, running functions to handle specific tasks
    if message.lower() == "reset":
        conversation_history = []
        yield "Conversation history has been reset."
        return

    # As weights are updated, Ren should be notified
    new_weights = {
        "personality": personality,
        "selfReflection": selfReflection,
        "abstractionLevel": abstractionLevel,
        "metaCognition": metaCognition
    }

    if not conversation_history:  # Add the full system message only once at the beginning
        system_message = create_system_message(new_weights, short=False)
        conversation_history.append({"role": "system", "content": system_message})

    if new_weights != current_weights:
        current_weights = new_weights
        short_system_message = create_system_message(new_weights, short=True)
        conversation_history.append({"role": "system", "content": short_system_message})

    conversation_history.append({"role": "user", "content": message})

    total_tokens = sum(calculate_token_count(message['content']) for message in conversation_history) + RESPONSE_TOKENS

    if total_tokens > TOKEN_THRESHOLD:
        yield "Message Limit Reached. Please type 'reset' to start another chat."
        return

    conversation_history = trim_conversation_history(conversation_history, MAX_TOKENS - RESPONSE_TOKENS)

    messages = conversation_history
    # I believe this should work with openAI API with a little editing, as Groq API is designed for easy migration of API usage
    # ^HOWEVER- Groq is lightning fast, able to handle threaded swarms better for online apps.
    client = Groq(api_key=api_key)
    response_accumulator = ""

    try:
        stream = client.chat.completions.create(
            messages=messages,
            # Try the larger or smaller LLama3 model (these should work locally depending on your hardware)
            # ^We just like llama3 better for the demo. The REN-AI architecture implemented in our prompt and functions found here can be used across models. 
            # ^^We also intend on using fine-tuned or custom AI for our future systems, but this helps test core concepts
            model="llama3-70b-8192",
            temperature=0.4,
            max_tokens=1024,
            top_p=1,
            stop=None,
            stream=True,
        )

        for chunk in stream:
            if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
                response_accumulator += chunk.choices[0].delta.content
                yield response_accumulator

        conversation_history.append({"role": "assistant", "content": response_accumulator})

    except Exception as e:
        yield f"An error occurred: {str(e)}"

    finally:
        return

personality_slider = gr.Slider(minimum=0, maximum=100, value=100, label="Personality")
self_reflection_slider = gr.Slider(minimum=0, maximum=100, value=100, label="Self-Reflection")
abstraction_level_slider = gr.Slider(minimum=0, maximum=100, value=100, label="Abstraction Level")
meta_cognition_slider = gr.Slider(minimum=0, maximum=100, value=100, label="Meta-Cognition")

iface = gr.ChatInterface(
    fn=predict,
    title="REN-AI DEMO | DARK ENGINE",
    description="Welcome to our limited demo | Learn more at [darkengine.ai](https://darkengine.ai)\n\nType 'reset' to remove error messages or delete AI memory",
    additional_inputs=[personality_slider, self_reflection_slider, abstraction_level_slider, meta_cognition_slider],
    additional_inputs_accordion=gr.Accordion(open=True, label="Cognition Settings"),
    theme="monochrome",
    css="footer{display:none !important}"
)

if __name__ == "__main__":
    iface.launch(show_api=False)