import gradio as gr import os from dotenv import load_dotenv load_dotenv() # --- LANGFUSE SETUP --- # We use the drop-in OpenAI client. This automatically traces all model calls. # We removed the @observe decorator from the function to prevent Gradio conflicts. try: from langfuse.openai import OpenAI print("✅ SUCCESS: Langfuse OpenAI client loaded.") LANGFUSE_ACTIVE = True except ImportError as e: print(f"⚠️ WARNING: Langfuse not found ({e}).") print("ℹ️ FALLBACK: Switching to standard OpenAI.") from openai import OpenAI LANGFUSE_ACTIVE = False # ---------------------- SYSTEM_PROMPT = os.getenv("XTRNPMT") API_BASE_URL = "https://api.featherless.ai/v1" FEATHERLESS_API_KEY = os.getenv("FEATHERLESS_API_KEY") FEATHERLESS_MODEL = "darkc0de/XortronCriminalComputingConfig" if not FEATHERLESS_API_KEY: print("WARNING: FEATHERLESS_API_KEY environment variable is not set.") try: if not FEATHERLESS_API_KEY: raise ValueError("FEATHERLESS_API_KEY is not set. Please set it as an environment variable or a secret in your deployment environment.") # Client initialization # If Langfuse is active, this client automatically logs to Langfuse. client = OpenAI( base_url=API_BASE_URL, api_key=FEATHERLESS_API_KEY ) print(f"OpenAI client initialized with base_url: {API_BASE_URL} for Featherless AI, model: {FEATHERLESS_MODEL}") except Exception as e: print(f"Error initializing OpenAI client with base_url '{API_BASE_URL}': {e}") raise RuntimeError( "Could not initialize OpenAI client. " f"Please check the API base URL ('{API_BASE_URL}'), your Featherless AI API key, model ID, " f"and ensure the server is accessible. Original error: {e}" ) def respond(message, history): """ This function processes the user's message and the chat history to generate a response from the language model using the Featherless AI API. """ # 32k tokens is roughly 128,000 characters. # We cap the context at 100,000 characters (~25k tokens) to leave 7k tokens safely for the AI's response generation. MAX_CONTEXT_CHARS = 100000 messages = [{"role": "system", "content": SYSTEM_PROMPT or ""}] # 1. Calculate how many characters we have available for the chat history system_chars = len(SYSTEM_PROMPT or "") message_chars = len(message or "") allowed_history_chars = MAX_CONTEXT_CHARS - system_chars - message_chars # 2. Iterate backwards through history to only keep the most recent messages that fit recent_history = [] current_hist_chars = 0 # In Gradio 6.0, history is a list of dicts: [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}] for msg in reversed(history): content = msg.get("content", "") or "" role = msg.get("role", "user") turn_chars = len(content) # Truncate older messages if appending them exceeds our safe limit if current_hist_chars + turn_chars > allowed_history_chars: break recent_history.insert(0, {"role": role, "content": content}) current_hist_chars += turn_chars # 3. Append the filtered history and the newest user message messages.extend(recent_history) messages.append({"role": "user", "content": message}) response_text = "" try: # Optional: Add a name to the trace if Langfuse is active kwargs = {} if LANGFUSE_ACTIVE: kwargs["name"] = "featherless-generation" stream = client.chat.completions.create( messages=messages, model=FEATHERLESS_MODEL, temperature=0.7, # Changed to 0.85 top_p=0.95, # Set top_p to 0.95 frequency_penalty=0.1, presence_penalty=0, stream=True, **kwargs ) for chunk in stream: # Check if there are choices and if the delta has content if chunk.choices and len(chunk.choices) > 0: delta = chunk.choices[0].delta if hasattr(delta, "content") and delta.content is not None: response_text += delta.content yield response_text except Exception as e: error_message = f"An error occurred during model inference with Featherless AI: {e}" print(error_message) yield error_message kofi_script = """ """ # Changed width of the image to 50% footer_image_html = """
Support Xortron on Ko-fi
""" custom_css = """ @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700&display=swap'); body, .gradio-container { font-family: 'Orbitron', sans-serif !important; } .gr-button { font-family: 'Orbitron', sans-serif !important; } .gr-input { font-family: 'Orbitron', sans-serif !important; } .gr-label { font-family: 'Orbitron', sans-serif !important; } .gr-chatbot .message { font-family: 'Orbitron', sans-serif !important; } /* --- HIDE THE HUGGING FACE SPACES HEADER --- */ #huggingface-spaces-header, #spaces-header, spaces-header, .spaces-header { display: none !important; } """ with gr.Blocks(title="XORTRON") as demo: gr.ChatInterface( fn=respond, # The function to call when a message is sent chatbot=gr.Chatbot( # Configure the chatbot display area height=800, # Set the height of the chat history display to 800px label="XORTRON - Criminal Computing" # Set the label ) ) # Added the clickable header image below the chat window gr.HTML(footer_image_html) if __name__ == "__main__": if not FEATHERLESS_API_KEY: print("\nCRITICAL ERROR: FEATHERLESS_API_KEY is not set.") print("Please ensure it's set as a secret in your Hugging Face Space settings or as an environment variable.\n") try: demo.queue(default_concurrency_limit=2) demo.launch(share=False, theme="Nymbo/Nymbo_Theme", head=kofi_script, css=custom_css) except NameError as ne: print(f"Gradio demo could not be launched. 'client' might not have been initialized: {ne}") except RuntimeError as re: print(f"Gradio demo could not be launched due to an error during client initialization: {re}") except Exception as e: print(f"An unexpected error occurred when trying to launch Gradio demo: {e}")