Spaces:

DarkEngineAI
/

REN_AI_DEMO

Runtime error

App Files Files

DarkEngineAI commited on May 19, 2024

Commit

1ed12f1

verified ·

1 Parent(s): 04646b7

Create app.py

Browse files

Files changed (1) hide show

app.py +158 -0

app.py ADDED Viewed

	@@ -0,0 +1,158 @@

+# ██████╗   █████╗  ██████╗  ██╗  ██╗
+# ██╔══██╗ ██╔══██╗ ██╔══██╗ ██║ ██╔╝
+# ██║  ██║ ███████║ ██████╔╝ █████╔╝
+# ██║  ██║ ██╔══██║ ██╔══██╗ ██╔═██╗
+# ██████╔╝ ██║  ██║ ██║  ██║ ██║  ██╗
+# ╚═════╝  ╚═╝  ╚═╝ ╚═╝  ╚═╝ ╚═╝  ╚═╝
+#
+# ███████╗ ███╗   ██╗  ██████╗  ████╗ ███╗   ██╗ ███████╗
+# ██╔════╝ ████╗  ██║ ██╔════╝   ██╔╝ ████╗  ██║ ██╔════╝
+# █████╗   ██╔██╗ ██║ ██║  ███╗  ██║  ██╔██╗ ██║ █████╗
+# ██╔══╝   ██║╚██╗██║ ██║   ██║  ██║  ██║╚██╗██║ ██╔══╝
+# ███████╗ ██║ ╚████║ ╚██████╔╝ ████╗ ██║ ╚████║ ███████╗
+# ╚══════╝ ╚═╝  ╚═══╝  ╚═════╝  ╚═══╝ ╚═╝  ╚═══╝ ╚══════╝
+# This is a demo for the REN-AI architecture that will be used in our upcoming products. Please visit darkengine.ai to learn more!
+import gradio as gr
+import os
+from groq import Groq
+from datetime import datetime
+import pytz
+# We use Groq for our API demo to showcase models that can run locally on your device (just faster with the LPU engine, if you have $20k you can do this speed locally lmao)
+# Local inference speeds will depend on your device (regarding the Dark Engine app or DarkOS for the REN-X3 robot)
+api_key = os.getenv("GROQ_API_KEY")
+# We are still updating the final system prompt architecture that will be shared in the future. For now, we store it as an secret variable on HF spaces
+system_prompt = os.getenv("SYSTEM_PROMPT")
+# Not calculated, just rounded. This can be updated easily...
+MAX_TOKENS = 8192
+RESPONSE_TOKENS = 1024
+USER_INPUT_TOKENS = 250
+TOKEN_THRESHOLD = 7000
+# Ren needs timestamps
+def get_current_central_time():
+    central = pytz.timezone('America/Chicago')
+    return datetime.now(central).strftime('%Y-%m-%d %H:%M:%S')
+# Ren needs to have weights initiated and then updated between interaction-states
+def create_system_message(weights, short=False):
+    current_time = get_current_central_time()
+    if short:
+        return f"Updated weights: {weights}, Current time: {current_time}"
+    return system_prompt.format(
+        personality=weights['personality'],
+        selfReflection=weights['selfReflection'],
+        abstractionLevel=weights['abstractionLevel'],
+        metaCognition=weights['metaCognition'],
+        current_time=current_time
+    )
+def calculate_token_count(text):
+    return len(text.split())
+def trim_conversation_history(conversation_history, max_tokens):
+    total_tokens = sum(calculate_token_count(message['content']) for message in conversation_history)
+    while total_tokens > max_tokens and len(conversation_history) > 1:
+        removed_message = conversation_history.pop(0)
+        total_tokens -= calculate_token_count(removed_message['content'])
+    return conversation_history
+async def predict(message, chat_history, personality, selfReflection, abstractionLevel, metaCognition):
+    current_weights = {
+    "personality": 100,
+    "selfReflection": 100,
+    "abstractionLevel": 100,
+    "metaCognition": 100
+    }
+    # This stores between clear-states to mimic long term memory with traditional database storage. We intend on adding Vector DB solutions soon!
+    # Might try pinecone for online (easy setup) or weaviate specifically for local stuff
+    conversation_history = []
+    # Reset command for conversation
+    # Other commands can be added to expand features, running functions to handle specific tasks
+    if message.lower() == "reset":
+        conversation_history = []
+        yield "Conversation history has been reset."
+        return
+    # As weights are updated, Ren should be notified
+    new_weights = {
+        "personality": personality,
+        "selfReflection": selfReflection,
+        "abstractionLevel": abstractionLevel,
+        "metaCognition": metaCognition
+    }
+    if not conversation_history:  # Add the full system message only once at the beginning
+        system_message = create_system_message(new_weights, short=False)
+        conversation_history.append({"role": "system", "content": system_message})
+    if new_weights != current_weights:
+        current_weights = new_weights
+        short_system_message = create_system_message(new_weights, short=True)
+        conversation_history.append({"role": "system", "content": short_system_message})
+    conversation_history.append({"role": "user", "content": message})
+    total_tokens = sum(calculate_token_count(message['content']) for message in conversation_history) + RESPONSE_TOKENS
+    if total_tokens > TOKEN_THRESHOLD:
+        yield "Message Limit Reached. Please type 'reset' to start another chat."
+        return
+    conversation_history = trim_conversation_history(conversation_history, MAX_TOKENS - RESPONSE_TOKENS)
+    messages = conversation_history
+    # I believe this should work with openAI API with a little editing, as Groq API is designed for easy migration of API usage
+    # ^HOWEVER- Groq is lightning fast, able to handle threaded swarms better for online apps.
+    client = Groq(api_key=api_key)
+    response_accumulator = ""
+    try:
+        stream = client.chat.completions.create(
+            messages=messages,
+            # Try the larger or smaller LLama3 model (these should work locally depending on your hardware)
+            # ^We just like llama3 better for the demo. The REN-AI architecture implemented in our prompt and functions found here can be used across models.
+            # ^^We also intend on using fine-tuned or custom AI for our future systems, but this helps test core concepts
+            model="llama3-70b-8192",
+            temperature=0.4,
+            max_tokens=1024,
+            top_p=1,
+            stop=None,
+            stream=True,
+        )
+        for chunk in stream:
+            if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
+                response_accumulator += chunk.choices[0].delta.content
+                yield response_accumulator
+        conversation_history.append({"role": "assistant", "content": response_accumulator})
+    except Exception as e:
+        yield f"An error occurred: {str(e)}"
+    finally:
+        return
+personality_slider = gr.Slider(minimum=0, maximum=100, value=100, label="Personality")
+self_reflection_slider = gr.Slider(minimum=0, maximum=100, value=100, label="Self-Reflection")
+abstraction_level_slider = gr.Slider(minimum=0, maximum=100, value=100, label="Abstraction Level")
+meta_cognition_slider = gr.Slider(minimum=0, maximum=100, value=100, label="Meta-Cognition")
+iface = gr.ChatInterface(
+    fn=predict,
+    title="REN-AI DEMO | DARK ENGINE",
+    description="Welcome to our limited demo | Learn more at [darkengine.ai](https://darkengine.ai)\n\nType 'reset' to remove error messages or delete AI memory",
+    additional_inputs=[personality_slider, self_reflection_slider, abstraction_level_slider, meta_cognition_slider],
+    additional_inputs_accordion=gr.Accordion(open=True, label="Cognition Settings"),
+    theme="monochrome",
+    css="footer{display:none !important}"
+)
+if __name__ == "__main__":
+    iface.launch(show_api=False)