MIST-Mini-8B / app.py
olumideola's picture
Update app.py
78fcccd verified
import gradio as gr
import os
import time
from openai import OpenAI, APIError
model_id = "olaverse/MIST-Mini-8B"
FEATHERLESS_API_KEY = os.environ.get("FEATHERLESS_API_KEY")
client = OpenAI(
api_key=FEATHERLESS_API_KEY,
base_url="https://api.featherless.ai/v1",
)
# --- Smart Parser to prevent Dictionary Hallucinations ---
def parse_text(item):
if isinstance(item, str):
return item
elif isinstance(item, dict):
if "text" in item:
return item["text"]
if "content" in item:
return item["content"]
elif isinstance(item, (list, tuple)):
if len(item) > 0:
return parse_text(item[0])
return str(item)
def respond(message, history, system_message, max_tokens, temperature, top_p, repetition_penalty):
print(f"DEBUG: rep_penalty={repetition_penalty}, max_tokens={max_tokens}, temp={temperature}")
# Safely extract system message
sys_msg_text = parse_text(system_message)
if not sys_msg_text:
sys_msg_text = "You are MIST, a highly capable AI assistant. Be concise and helpful."
messages = [{"role": "system", "content": sys_msg_text}]
# Safely unpack history
for interaction in history:
if isinstance(interaction, dict):
messages.append({
"role": interaction.get("role", "user"),
"content": parse_text(interaction.get("content", ""))
})
elif isinstance(interaction, (list, tuple)):
user_text = parse_text(interaction[0]) if interaction[0] is not None else ""
bot_text = parse_text(interaction[1]) if interaction[1] is not None else ""
messages.append({"role": "user", "content": user_text})
messages.append({"role": "assistant", "content": bot_text})
# Safely extract current user message
user_msg_text = parse_text(message)
messages.append({"role": "user", "content": user_msg_text})
# Stream response from Featherless (retry up to 3 times on capacity errors)
for attempt in range(3):
try:
stream = client.chat.completions.create(
model=model_id,
messages=messages,
max_tokens=max_tokens,
temperature=float(temperature) if temperature > 0 else 1.0,
top_p=float(top_p),
frequency_penalty=max(0.0, float(repetition_penalty) - 1.0), # map 1.0–2.0 → 0.0–1.0
stream=True,
)
response = ""
for chunk in stream:
# Skip keep-alive / metadata chunks with no choices
if not chunk.choices:
continue
delta = chunk.choices[0].delta.content
if delta:
response += delta
yield response
return # success — exit retry loop
except APIError as e:
if "capacity" in str(e).lower() and attempt < 2:
wait = 5 * (attempt + 1)
yield f"⚠️ Model is at capacity, retrying in {wait}s… (attempt {attempt + 1}/3)"
time.sleep(wait)
else:
yield f"❌ Featherless API error: {e}"
return
chatbot = gr.ChatInterface(
respond,
title="MIST-Mini-8B (Featherless)",
description="MIST-Mini-8B served via Featherless AI.",
additional_inputs=[
gr.Textbox(
value="You are MIST, a highly capable AI assistant. Be concise and helpful.",
label="System message"
),
gr.Slider(minimum=1, maximum=4096, value=1024, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
gr.Slider(minimum=1.0, maximum=2.0, value=1.5, step=0.05, label="Repetition penalty"),
],
)
if __name__ == "__main__":
chatbot.launch(ssr_mode=False)