| import os | |
| import gradio as gr | |
| from openai import OpenAI | |
| BASE = os.getenv("HF_ENDPOINT_URL", "").rstrip("/") | |
| API_KEY = os.getenv("HF_TOKEN") | |
| MODEL_ID = os.getenv("MODEL_ID", "RedMod/mangrove_30b_a3b_sft_step_6000") | |
| if not BASE or not API_KEY: | |
| raise RuntimeError("Set HF_ENDPOINT_URL and HF_TOKEN in Settings → Repository secrets.") | |
| client = OpenAI(base_url=f"{BASE}/v1", api_key=API_KEY) | |
| def build_messages(history, user_msg, system_msg): | |
| msgs = [] | |
| if system_msg and system_msg.strip(): | |
| msgs.append({"role": "system", "content": system_msg.strip()}) | |
| for u, a in history: | |
| if u: | |
| msgs.append({"role": "user", "content": u}) | |
| if a: | |
| msgs.append({"role": "assistant", "content": a}) | |
| msgs.append({"role": "user", "content": user_msg}) | |
| return msgs | |
| def chat_fn(message, history, system_message, temperature, top_p, max_tokens): | |
| msgs = build_messages(history, message, system_message) | |
| stream = client.chat.completions.create( | |
| model=MODEL_ID, | |
| messages=msgs, | |
| temperature=float(temperature), | |
| top_p=float(top_p), | |
| max_tokens=int(max_tokens), | |
| stream=True, | |
| ) | |
| partial = "" | |
| for chunk in stream: | |
| delta = chunk.choices[0].delta | |
| if delta and delta.content: | |
| partial += delta.content | |
| yield partial | |
| with gr.Blocks(title="Mangrove Demo") as demo: | |
| system_box = gr.Textbox( | |
| label="System prompt", | |
| value="You are a helpful assistant.", | |
| lines=2, | |
| ) | |
| with gr.Row(): | |
| temp = gr.Slider(0.0, 2.0, value=0.7, step=0.1, label="Temperature") | |
| topp = gr.Slider(0.0, 1.0, value=0.95, step=0.01, label="Top-p") | |
| maxt = gr.Slider(16, 4096, value=512, step=16, label="Max tokens") | |
| gr.ChatInterface( | |
| fn=chat_fn, | |
| additional_inputs=[system_box, temp, topp, maxt], | |
| submit_btn="Send", | |
| stop_btn="Stop", | |
| multimodal=False, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |