"""Fireworks Vision Studio - Chat interface for Document Intelligence & Design to Code.""" import base64 import os import tempfile import time import uuid import fitz # PyMuPDF import gradio as gr from gradio_client import utils as client_utils import modelscope_studio.components.antd as antd import modelscope_studio.components.antdx as antdx import modelscope_studio.components.base as ms import modelscope_studio.components.pro as pro from modelscope_studio.components.pro.chatbot import ( ChatbotActionConfig, ChatbotBotConfig, ChatbotMarkdownConfig, ChatbotUserConfig, ChatbotWelcomeConfig, ) from modelscope_studio.components.pro.multimodal_input import ( MultimodalInputUploadConfig, ) from openai import OpenAI from PIL import Image try: from dotenv import load_dotenv load_dotenv() except ImportError: pass # --------------------------------------------------------------------------- # Config # --------------------------------------------------------------------------- MODELS = { "kimi-k2p5": { "label": "Kimi K2.5", "id": "accounts/fireworks/models/kimi-k2p5", "icon": "./assets/kimi-logo.png", }, "qwen3-vl-30b": { "label": "Qwen3 VL 30B", "id": "accounts/fireworks/models/qwen3-vl-30b-a3b-instruct", "icon": "./assets/qwen-logo.png", }, } DEFAULT_MODEL_KEY = "kimi-k2p5" MAX_IMAGE_DIM = 2048 MAX_PDF_PAGES = 5 PDF_DPI = 200 MAX_MESSAGES_PER_CHAT = 10 DEFAULT_SYS_PROMPT = ( "You are Fireworks Vision Studio, a highly capable multimodal AI assistant. " "You excel at document analysis (legal, medical, financial) and converting " "designs/wireframes into code. Be thorough, accurate, and well-structured. " "Use markdown formatting for clarity. When generating HTML/CSS, return a " "single self-contained file wrapped in a ```html code fence. " "When responding, focus primarily on addressing the user's most recent message. " "Use the conversation history as context to understand the thread, but " "prioritize answering the latest question or request." ) DEFAULT_THEME = { "token": { "colorPrimary": "#6d28d9", } } def get_client() -> OpenAI: api_key = os.environ.get("FIREWORKS_API_KEY", "") if not api_key: raise ValueError( "FIREWORKS_API_KEY environment variable is not set. " "Please set your API key before running the app." ) return OpenAI( base_url="https://api.fireworks.ai/inference/v1", api_key=api_key, ) # --------------------------------------------------------------------------- # Image / PDF utilities # --------------------------------------------------------------------------- def encode_file_to_base64(file_path: str) -> str: """Encode a file to a base64 data URI, resizing images if needed.""" mime_type = client_utils.get_mimetype(file_path) # Resize large images if mime_type and mime_type.startswith("image"): img = Image.open(file_path) if max(img.size) > MAX_IMAGE_DIM: img.thumbnail((MAX_IMAGE_DIM, MAX_IMAGE_DIM), Image.LANCZOS) tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False) img.save(tmp.name, "PNG") file_path = tmp.name mime_type = "image/png" with open(file_path, "rb") as f: b64 = base64.b64encode(f.read()).decode("utf-8") return f"data:{mime_type};base64,{b64}" def pdf_to_images(pdf_path: str) -> list[str]: """Convert PDF pages to temporary PNG files via PyMuPDF.""" doc = fitz.open(pdf_path) paths = [] for i, page in enumerate(doc): if i >= MAX_PDF_PAGES: break mat = fitz.Matrix(PDF_DPI / 72, PDF_DPI / 72) pix = page.get_pixmap(matrix=mat) tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False) pix.save(tmp.name) paths.append(tmp.name) doc.close() return paths # --------------------------------------------------------------------------- # Message formatting # --------------------------------------------------------------------------- def format_history(history): """Convert chatbot history to OpenAI-compatible messages.""" messages = [{"role": "system", "content": DEFAULT_SYS_PROMPT}] for item in history: if item["role"] == "user": files = [] for file_path in item["content"][0]["content"]: if os.path.exists(file_path): mime_type = client_utils.get_mimetype(file_path) # Handle PDFs: convert to images if mime_type == "application/pdf": for img_path in pdf_to_images(file_path): data_uri = encode_file_to_base64(img_path) files.append( { "type": "image_url", "image_url": {"url": data_uri}, } ) elif mime_type and mime_type.startswith("image"): data_uri = encode_file_to_base64(file_path) files.append( { "type": "image_url", "image_url": {"url": data_uri}, } ) elif file_path.startswith("http"): files.append( {"type": "image_url", "image_url": {"url": file_path}} ) text = item["content"][1]["content"] messages.append( {"role": "user", "content": files + [{"type": "text", "text": text}]} ) elif item["role"] == "assistant": contents = [ c for c in item["content"] if c.get("type") == "text" ] messages.append( { "role": "assistant", "content": contents[0]["content"] if contents else "", } ) return messages # --------------------------------------------------------------------------- # UI config helpers # --------------------------------------------------------------------------- def user_config(disabled_actions=None): return ChatbotUserConfig( class_names=dict(content="user-message-content"), actions=[ "copy", "edit", ChatbotActionConfig( action="delete", popconfirm=dict( title="Delete message", description="Are you sure you want to delete this message?", okButtonProps=dict(danger=True), ), ), ], disabled_actions=disabled_actions, ) def bot_config(disabled_actions=None, model_key=DEFAULT_MODEL_KEY): model = MODELS[model_key] return ChatbotBotConfig( actions=[ "copy", "edit", ChatbotActionConfig( action="retry", popconfirm=dict( title="Regenerate message", description="This will also delete all subsequent messages.", okButtonProps=dict(danger=True), ), ), ChatbotActionConfig( action="delete", popconfirm=dict( title="Delete message", description="Are you sure you want to delete this message?", okButtonProps=dict(danger=True), ), ), ], avatar=model["icon"], disabled_actions=disabled_actions, ) def welcome_config(model_key=DEFAULT_MODEL_KEY): model = MODELS[model_key] return ChatbotWelcomeConfig( variant="borderless", icon=model["icon"], title=f"Hello, I'm {model['label']}", description="Upload images or PDFs and start a conversation. I specialize in document analysis and converting designs to code.", prompts=dict( title="How can I help you today?", styles={ "list": {"width": "100%"}, "item": {"flex": 1}, }, items=[ { "label": "📄 Document Intelligence", "children": [ { "description": "Extract all parties, dates, and key obligations from this contract.", }, { "description": "Summarize this medical report. List diagnoses, medications, and follow-up actions.", }, { "description": "Extract all line items and totals from this invoice into a markdown table.", }, ], }, { "label": "🎨 Design to Code", "children": [ { "description": "Convert this wireframe into a responsive HTML page with a modern look.", }, { "description": "Build this as a landing page with hero section, features grid, and footer using HTML and CSS.", }, { "description": "Create a responsive dashboard layout matching this sketch with HTML, CSS, and inline JS.", }, ], }, ], ), ) def markdown_config(): return ChatbotMarkdownConfig() def upload_config(): return MultimodalInputUploadConfig( accept="image/*,.pdf", placeholder={ "inline": { "title": "Upload files", "description": "Click or drag files to upload images or PDFs", }, "drop": { "title": "Drop files here", }, }, ) # --------------------------------------------------------------------------- # Event handlers # --------------------------------------------------------------------------- class Events: @staticmethod def submit(state_value): history = state_value["conversation_contexts"][ state_value["conversation_id"] ]["history"] model_key = state_value.get("model_key", DEFAULT_MODEL_KEY) model_info = MODELS[model_key] messages = format_history(history) history.append( { "role": "assistant", "content": [], "key": str(uuid.uuid4()), "loading": True, "header": model_info["label"], "status": "pending", } ) yield { chatbot: gr.update(value=history), state: gr.update(value=state_value), } try: try: client = get_client() except ValueError as e: history[-1]["loading"] = False history[-1]["status"] = "done" history[-1]["content"] = [ { "type": "text", "content": f'⚠️ {e}', } ] yield { chatbot: gr.update(value=history), state: gr.update(value=state_value), } return response = client.chat.completions.create( model=model_info["id"], messages=messages, stream=True, max_tokens=4096, ) start_time = time.time() answer_content = "" contents = [None] for chunk in response: if not chunk or not chunk.choices[0].delta.content: continue delta = chunk.choices[0].delta if delta.content: if contents[0] is None: contents[0] = {"type": "text", "content": ""} answer_content += delta.content contents[0]["content"] = answer_content history[-1]["content"] = [c for c in contents if c] history[-1]["loading"] = False yield { chatbot: gr.update(value=history), state: gr.update(value=state_value), } history[-1]["status"] = "done" cost_time = f"{time.time() - start_time:.2f}" history[-1]["footer"] = f"{cost_time}s" yield { chatbot: gr.update(value=history), state: gr.update(value=state_value), } except Exception as e: history[-1]["loading"] = False history[-1]["status"] = "done" history[-1]["content"] = [ { "type": "text", "content": f'❌ Error: {e}', } ] yield { chatbot: gr.update(value=history), state: gr.update(value=state_value), } raise @staticmethod def add_message(input_value, state_value): text = input_value["text"] files = input_value["files"] # Check global session message limit if state_value.get("total_messages", 0) >= MAX_MESSAGES_PER_CHAT: # Show error - global limit reached error_msg = f"⚠️ Session message limit reached ({MAX_MESSAGES_PER_CHAT} messages)." yield { chatbot: gr.update( value=[ { "role": "assistant", "content": [{"type": "text", "content": f'{error_msg}'}], "key": str(uuid.uuid4()), } ] ), state: gr.update(value=state_value), } return # Create new chat if needed (no conversation active) if not state_value["conversation_id"]: random_id = str(uuid.uuid4()) state_value["conversation_id"] = random_id state_value["conversation_contexts"][random_id] = {"history": []} state_value["conversations"].append( {"label": text[:50], "key": random_id} ) history = state_value["conversation_contexts"][ state_value["conversation_id"] ]["history"] # Increment global message counter (user message + assistant response = 2 messages) state_value["total_messages"] = state_value.get("total_messages", 0) + 2 history.append( { "key": str(uuid.uuid4()), "role": "user", "content": [ {"type": "file", "content": [f for f in files]}, {"type": "text", "content": text}, ], } ) yield Events.preprocess_submit(clear_input=True)(state_value) try: for chunk in Events.submit(state_value): yield chunk except Exception: raise finally: yield Events.postprocess_submit(state_value) @staticmethod def preprocess_submit(clear_input=True): def handler(state_value): history = state_value["conversation_contexts"][ state_value["conversation_id"] ]["history"] return { **( {input: gr.update(value=None, loading=True)} if clear_input else {} ), conversations: gr.update( active_key=state_value["conversation_id"], items=list( map( lambda item: { **item, "disabled": item["key"] != state_value["conversation_id"], }, state_value["conversations"], ) ), ), add_conversation_btn: gr.update(disabled=True), clear_btn: gr.update(disabled=True), conversation_delete_menu_item: gr.update(disabled=True), chatbot: gr.update( value=history, bot_config=bot_config( disabled_actions=["edit", "retry", "delete"], model_key=state_value.get("model_key", DEFAULT_MODEL_KEY), ), user_config=user_config( disabled_actions=["edit", "delete"] ), ), state: gr.update(value=state_value), } return handler @staticmethod def postprocess_submit(state_value): history = state_value["conversation_contexts"][ state_value["conversation_id"] ]["history"] mk = state_value.get("model_key", DEFAULT_MODEL_KEY) return { input: gr.update(loading=False), conversation_delete_menu_item: gr.update(disabled=False), clear_btn: gr.update(disabled=False), conversations: gr.update(items=state_value["conversations"]), add_conversation_btn: gr.update(disabled=False), chatbot: gr.update( value=history, bot_config=bot_config(model_key=mk), user_config=user_config(), ), state: gr.update(value=state_value), } @staticmethod def cancel(state_value): history = state_value["conversation_contexts"][ state_value["conversation_id"] ]["history"] history[-1]["loading"] = False history[-1]["status"] = "done" history[-1]["footer"] = "Chat completion paused" return Events.postprocess_submit(state_value) @staticmethod def delete_message(state_value, e: gr.EventData): index = e._data["payload"][0]["index"] history = state_value["conversation_contexts"][ state_value["conversation_id"] ]["history"] # Decrement total_messages counter when deleting if index < len(history): deleted_role = history[index]["role"] if deleted_role == "user": # User message + expected assistant response = 2 messages state_value["total_messages"] = max(0, state_value.get("total_messages", 0) - 2) elif deleted_role == "assistant": # Just the assistant message = 1 message state_value["total_messages"] = max(0, state_value.get("total_messages", 0) - 1) history = history[:index] + history[index + 1 :] state_value["conversation_contexts"][ state_value["conversation_id"] ]["history"] = history return gr.update(value=state_value) @staticmethod def edit_message(state_value, chatbot_value, e: gr.EventData): index = e._data["payload"][0]["index"] history = state_value["conversation_contexts"][ state_value["conversation_id"] ]["history"] history[index]["content"] = chatbot_value[index]["content"] if not history[index].get("edited"): history[index]["edited"] = True footer = history[index].get("footer", "") or "" history[index]["footer"] = (footer + " " if footer else "") + "Edited" return gr.update(value=state_value), gr.update(value=history) @staticmethod def regenerate_message(state_value, e: gr.EventData): index = e._data["payload"][0]["index"] history = state_value["conversation_contexts"][ state_value["conversation_id"] ]["history"] history = history[:index] state_value["conversation_contexts"][ state_value["conversation_id"] ]["history"] = history yield Events.preprocess_submit()(state_value) try: for chunk in Events.submit(state_value): yield chunk except Exception: raise finally: yield Events.postprocess_submit(state_value) @staticmethod def apply_prompt(e: gr.EventData, input_value): input_value["text"] = e._data["payload"][0]["value"]["description"] urls = e._data["payload"][0]["value"].get("urls", []) input_value["files"] = urls return gr.update(value=input_value) @staticmethod def new_chat(state_value): if not state_value["conversation_id"]: return gr.skip() state_value["conversation_id"] = "" return ( gr.update(active_key=state_value["conversation_id"]), gr.update(value=None), gr.update(value=state_value), ) @staticmethod def select_conversation(state_value, e: gr.EventData): active_key = e._data["payload"][0] if state_value["conversation_id"] == active_key or ( active_key not in state_value["conversation_contexts"] ): return gr.skip() state_value["conversation_id"] = active_key return ( gr.update(active_key=active_key), gr.update( value=state_value["conversation_contexts"][active_key]["history"] ), gr.update(value=state_value), ) @staticmethod def click_conversation_menu(state_value, e: gr.EventData): conversation_id = e._data["payload"][0]["key"] operation = e._data["payload"][1]["key"] if operation == "delete": del state_value["conversation_contexts"][conversation_id] state_value["conversations"] = [ item for item in state_value["conversations"] if item["key"] != conversation_id ] if state_value["conversation_id"] == conversation_id: state_value["conversation_id"] = "" return ( gr.update( items=state_value["conversations"], active_key=state_value["conversation_id"], ), gr.update(value=None), gr.update(value=state_value), ) return ( gr.update(items=state_value["conversations"]), gr.skip(), gr.update(value=state_value), ) @staticmethod def clear_conversation_history(state_value): if not state_value["conversation_id"]: return gr.skip() state_value["conversation_contexts"][ state_value["conversation_id"] ]["history"] = [] return gr.update(value=None), gr.update(value=state_value) @staticmethod def change_model(state_value, e: gr.EventData): model_key = e._data["payload"][0] state_value["model_key"] = model_key return ( gr.update( welcome_config=welcome_config(model_key), bot_config=bot_config(model_key=model_key), ), gr.update(value=state_value), ) # --------------------------------------------------------------------------- # CSS # --------------------------------------------------------------------------- css = """ .gradio-container { padding: 0 !important; } .gradio-container > main.fillable { padding: 0 !important; } #chatbot { height: calc(100vh - 70px); max-height: 1500px; } #chatbot .chatbot-conversations { height: 100vh; background-color: var(--ms-gr-ant-color-bg-layout); padding-left: 4px; padding-right: 4px; } #chatbot .chatbot-conversations .chatbot-conversations-list { padding-left: 0; padding-right: 0; } #chatbot .chatbot-chat { padding: 32px; padding-bottom: 16px; height: 100%; } @media (max-width: 768px) { #chatbot .chatbot-chat { padding: 10px; padding-bottom: 10px; } } #chatbot .chatbot-chat .chatbot-chat-messages { flex: 1; } .powered-by-bar { display: flex; align-items: center; justify-content: space-between; padding: 6px 16px; border-bottom: 1px solid var(--ms-gr-ant-color-border-secondary, #f0f0f0); } .powered-by-bar .bar-title { font-size: 24px; font-weight: 600; color: #333; } .powered-by-bar .bar-right { display: flex; align-items: center; gap: 6px; } .powered-by-bar .bar-right span { font-size: 11px; color: #999; text-transform: uppercase; letter-spacing: 0.04em; } .powered-by-bar img { height: 18px; opacity: 0.85; } """ # --------------------------------------------------------------------------- # Gradio UI # --------------------------------------------------------------------------- with gr.Blocks(css=css, fill_width=True) as demo: state = gr.State( { "conversation_contexts": {}, "conversations": [], "conversation_id": "", "model_key": DEFAULT_MODEL_KEY, "total_messages": 0, } ) with ms.Application(), antdx.XProvider(theme=DEFAULT_THEME), ms.AutoLoading(): # Check for API key and show warning if missing api_key = os.environ.get("FIREWORKS_API_KEY", "") if not api_key: antd.Alert( message="API Key Required", description="Please set the FIREWORKS_API_KEY environment variable to use this app. The chat will not work without it.", type="error", show_icon=True, banner=True, closable=False, ) # Top bar: title left, powered-by right with ms.Div(elem_classes="powered-by-bar"): ms.Span("Welcome to FW Vision Studio", elem_classes="bar-title") with ms.Div(elem_classes="bar-right"): ms.Span("Powered by") antd.Image( "./assets/fireworks_logo.png", preview=False, alt="Fireworks AI", height=30, ) with antd.Row(gutter=[20, 20], wrap=False, elem_id="chatbot"): # --- Left sidebar --- with antd.Col( md=dict(flex="0 0 260px", span=24, order=0), span=0, order=1, elem_style=dict(width=0), ): with ms.Div(elem_classes="chatbot-conversations"): with antd.Flex( vertical=True, gap="small", elem_style=dict(height="100%") ): # Model selector model_select = antd.Select( value=DEFAULT_MODEL_KEY, options=[ {"label": m["label"], "value": k} for k, m in MODELS.items() ], elem_style=dict(width="100%"), ) # New conversation button with antd.Button( value=None, color="primary", variant="filled", block=True ) as add_conversation_btn: ms.Text("New Conversation") with ms.Slot("icon"): antd.Icon("PlusOutlined") # Conversations list with antdx.Conversations( elem_classes="chatbot-conversations-list", ) as conversations: with ms.Slot("menu.items"): with antd.Menu.Item( label="Delete", key="delete", danger=True ) as conversation_delete_menu_item: with ms.Slot("icon"): antd.Icon("DeleteOutlined") # --- Right: chat area --- with antd.Col(flex=1): with antd.Flex( vertical=True, gap="small", elem_classes="chatbot-chat" ): chatbot = pro.Chatbot( elem_classes="chatbot-chat-messages", height=0, markdown_config=markdown_config(), welcome_config=welcome_config(), user_config=user_config(), bot_config=bot_config(), ) with pro.MultimodalInput( placeholder="Ask me to analyze a document or convert a design to code...", upload_config=upload_config(), ) as input: with ms.Slot("prefix"): with antd.Flex( gap=4, wrap=True, elem_style=dict( maxWidth="40vw", display="inline-flex" ), ): with antd.Button(value=None, type="text") as clear_btn: with ms.Slot("icon"): antd.Icon("ClearOutlined") # --- Event wiring --- # Model selector model_select.change( fn=Events.change_model, inputs=[state], outputs=[chatbot, state], ) # Conversations add_conversation_btn.click( fn=Events.new_chat, inputs=[state], outputs=[conversations, chatbot, state], ) conversations.active_change( fn=Events.select_conversation, inputs=[state], outputs=[conversations, chatbot, state], ) conversations.menu_click( fn=Events.click_conversation_menu, inputs=[state], outputs=[conversations, chatbot, state], ) # Chatbot chatbot.welcome_prompt_select( fn=Events.apply_prompt, inputs=[input], outputs=[input] ) chatbot.delete(fn=Events.delete_message, inputs=[state], outputs=[state]) chatbot.edit( fn=Events.edit_message, inputs=[state, chatbot], outputs=[state, chatbot], ) regenerating_event = chatbot.retry( fn=Events.regenerate_message, inputs=[state], outputs=[ input, clear_btn, conversation_delete_menu_item, add_conversation_btn, conversations, chatbot, state, ], ) # Input submit_event = input.submit( fn=Events.add_message, inputs=[input, state], outputs=[ input, clear_btn, conversation_delete_menu_item, add_conversation_btn, conversations, chatbot, state, ], ) input.cancel( fn=Events.cancel, inputs=[state], outputs=[ input, conversation_delete_menu_item, clear_btn, conversations, add_conversation_btn, chatbot, state, ], cancels=[submit_event, regenerating_event], queue=False, ) clear_btn.click( fn=Events.clear_conversation_history, inputs=[state], outputs=[chatbot, state], ) if __name__ == "__main__": demo.queue(default_concurrency_limit=100).launch( server_name="0.0.0.0", server_port=7860, ssr_mode=False, max_threads=100, )