Spaces:
Running
Running
| import gradio as gr | |
| import os | |
| import time | |
| from openai import OpenAI, APIError | |
| model_id = "olaverse/MIST-Mini-8B" | |
| FEATHERLESS_API_KEY = os.environ.get("FEATHERLESS_API_KEY") | |
| client = OpenAI( | |
| api_key=FEATHERLESS_API_KEY, | |
| base_url="https://api.featherless.ai/v1", | |
| ) | |
| # --- Smart Parser to prevent Dictionary Hallucinations --- | |
| def parse_text(item): | |
| if isinstance(item, str): | |
| return item | |
| elif isinstance(item, dict): | |
| if "text" in item: | |
| return item["text"] | |
| if "content" in item: | |
| return item["content"] | |
| elif isinstance(item, (list, tuple)): | |
| if len(item) > 0: | |
| return parse_text(item[0]) | |
| return str(item) | |
| def respond(message, history, system_message, max_tokens, temperature, top_p, repetition_penalty): | |
| print(f"DEBUG: rep_penalty={repetition_penalty}, max_tokens={max_tokens}, temp={temperature}") | |
| # Safely extract system message | |
| sys_msg_text = parse_text(system_message) | |
| if not sys_msg_text: | |
| sys_msg_text = "You are MIST, a highly capable AI assistant. Be concise and helpful." | |
| messages = [{"role": "system", "content": sys_msg_text}] | |
| # Safely unpack history | |
| for interaction in history: | |
| if isinstance(interaction, dict): | |
| messages.append({ | |
| "role": interaction.get("role", "user"), | |
| "content": parse_text(interaction.get("content", "")) | |
| }) | |
| elif isinstance(interaction, (list, tuple)): | |
| user_text = parse_text(interaction[0]) if interaction[0] is not None else "" | |
| bot_text = parse_text(interaction[1]) if interaction[1] is not None else "" | |
| messages.append({"role": "user", "content": user_text}) | |
| messages.append({"role": "assistant", "content": bot_text}) | |
| # Safely extract current user message | |
| user_msg_text = parse_text(message) | |
| messages.append({"role": "user", "content": user_msg_text}) | |
| # Stream response from Featherless (retry up to 3 times on capacity errors) | |
| for attempt in range(3): | |
| try: | |
| stream = client.chat.completions.create( | |
| model=model_id, | |
| messages=messages, | |
| max_tokens=max_tokens, | |
| temperature=float(temperature) if temperature > 0 else 1.0, | |
| top_p=float(top_p), | |
| frequency_penalty=max(0.0, float(repetition_penalty) - 1.0), # map 1.0–2.0 → 0.0–1.0 | |
| stream=True, | |
| ) | |
| response = "" | |
| for chunk in stream: | |
| # Skip keep-alive / metadata chunks with no choices | |
| if not chunk.choices: | |
| continue | |
| delta = chunk.choices[0].delta.content | |
| if delta: | |
| response += delta | |
| yield response | |
| return # success — exit retry loop | |
| except APIError as e: | |
| if "capacity" in str(e).lower() and attempt < 2: | |
| wait = 5 * (attempt + 1) | |
| yield f"⚠️ Model is at capacity, retrying in {wait}s… (attempt {attempt + 1}/3)" | |
| time.sleep(wait) | |
| else: | |
| yield f"❌ Featherless API error: {e}" | |
| return | |
| chatbot = gr.ChatInterface( | |
| respond, | |
| title="MIST-Mini-8B (Featherless)", | |
| description="MIST-Mini-8B served via Featherless AI.", | |
| additional_inputs=[ | |
| gr.Textbox( | |
| value="You are MIST, a highly capable AI assistant. Be concise and helpful.", | |
| label="System message" | |
| ), | |
| gr.Slider(minimum=1, maximum=4096, value=1024, step=1, label="Max new tokens"), | |
| gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
| gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"), | |
| gr.Slider(minimum=1.0, maximum=2.0, value=1.5, step=0.05, label="Repetition penalty"), | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| chatbot.launch(ssr_mode=False) |