import gradio as gr import os import time from openai import OpenAI, APIError model_id = "olaverse/MIST-Mini-8B" FEATHERLESS_API_KEY = os.environ.get("FEATHERLESS_API_KEY") client = OpenAI( api_key=FEATHERLESS_API_KEY, base_url="https://api.featherless.ai/v1", ) # --- Smart Parser to prevent Dictionary Hallucinations --- def parse_text(item): if isinstance(item, str): return item elif isinstance(item, dict): if "text" in item: return item["text"] if "content" in item: return item["content"] elif isinstance(item, (list, tuple)): if len(item) > 0: return parse_text(item[0]) return str(item) def respond(message, history, system_message, max_tokens, temperature, top_p, repetition_penalty): print(f"DEBUG: rep_penalty={repetition_penalty}, max_tokens={max_tokens}, temp={temperature}") # Safely extract system message sys_msg_text = parse_text(system_message) if not sys_msg_text: sys_msg_text = "You are MIST, a highly capable AI assistant. Be concise and helpful." messages = [{"role": "system", "content": sys_msg_text}] # Safely unpack history for interaction in history: if isinstance(interaction, dict): messages.append({ "role": interaction.get("role", "user"), "content": parse_text(interaction.get("content", "")) }) elif isinstance(interaction, (list, tuple)): user_text = parse_text(interaction[0]) if interaction[0] is not None else "" bot_text = parse_text(interaction[1]) if interaction[1] is not None else "" messages.append({"role": "user", "content": user_text}) messages.append({"role": "assistant", "content": bot_text}) # Safely extract current user message user_msg_text = parse_text(message) messages.append({"role": "user", "content": user_msg_text}) # Stream response from Featherless (retry up to 3 times on capacity errors) for attempt in range(3): try: stream = client.chat.completions.create( model=model_id, messages=messages, max_tokens=max_tokens, temperature=float(temperature) if temperature > 0 else 1.0, top_p=float(top_p), frequency_penalty=max(0.0, float(repetition_penalty) - 1.0), # map 1.0–2.0 → 0.0–1.0 stream=True, ) response = "" for chunk in stream: # Skip keep-alive / metadata chunks with no choices if not chunk.choices: continue delta = chunk.choices[0].delta.content if delta: response += delta yield response return # success — exit retry loop except APIError as e: if "capacity" in str(e).lower() and attempt < 2: wait = 5 * (attempt + 1) yield f"⚠️ Model is at capacity, retrying in {wait}s… (attempt {attempt + 1}/3)" time.sleep(wait) else: yield f"❌ Featherless API error: {e}" return chatbot = gr.ChatInterface( respond, title="MIST-Mini-8B (Featherless)", description="MIST-Mini-8B served via Featherless AI.", additional_inputs=[ gr.Textbox( value="You are MIST, a highly capable AI assistant. Be concise and helpful.", label="System message" ), gr.Slider(minimum=1, maximum=4096, value=1024, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"), gr.Slider(minimum=1.0, maximum=2.0, value=1.5, step=0.05, label="Repetition penalty"), ], ) if __name__ == "__main__": chatbot.launch(ssr_mode=False)