Qianfan-VL / app.py
Azure99's picture
Update app.py
649b351 verified
import base64
import mimetypes
import os
from pathlib import Path
from typing import Any, Dict, List
import gradio as gr
from openai import OpenAI
DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "qianfan-vl-70b")
_client = OpenAI(
base_url="https://qianfan.baidubce.com/v2",
api_key=os.getenv("QIANFAN_API_KEY", ""),
)
def _data_url(path: str) -> str:
mime, _ = mimetypes.guess_type(path)
mime = mime or "application/octet-stream"
data = base64.b64encode(Path(path).read_bytes()).decode("utf-8")
return f"data:{mime};base64,{data}"
def _image_content(path: str) -> Dict[str, Any]:
return {"type": "image_url", "image_url": {"url": _data_url(path)}}
def _text_content(text: str) -> Dict[str, Any]:
return {"type": "text", "text": text}
def _message(role: str, content: Any) -> Dict[str, Any]:
return {"role": role, "content": content}
def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]:
files = message.get("files") or []
text = (message.get("text") or "").strip()
content: List[Dict[str, Any]] = [_image_content(p) for p in files]
if text:
content.append(_text_content(text))
return _message("user", content)
def _convert_history(history: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
msgs: List[Dict[str, Any]] = []
user_content: List[Dict[str, Any]] = []
for turn in history or []:
role, content = turn.get("role"), turn.get("content")
if role == "user":
if isinstance(content, str):
user_content.append(_text_content(content))
elif isinstance(content, tuple):
user_content.extend(_image_content(path) for path in content if path)
elif role == "assistant":
msgs.append(_message("user", user_content.copy()))
user_content.clear()
msgs.append(_message("assistant", content))
return msgs
def _add_image_tokens(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
new_messages = []
for message in messages:
if message.get("role") != "user":
new_messages.append(message)
continue
content = message.get("content", [])
if not isinstance(content, list):
new_messages.append(message)
continue
image_count = sum(1 for item in content if item.get("type") == "image_url")
image_tokens = "<image>" * image_count
new_content = []
for item in content:
if item.get("type") == "image_url":
new_content.append(item)
elif item.get("type") == "text":
text = image_tokens + item.get("text", "")
new_content.append(_text_content(text))
new_messages.append(_message("user", new_content))
return new_messages
def stream_response(message: Dict[str, Any], history: List[Dict[str, Any]], model_name: str = DEFAULT_MODEL):
messages = _convert_history(history)
messages.append(_build_user_message(message))
messages = _add_image_tokens(messages)
try:
stream = _client.chat.completions.create(
model=model_name,
messages=messages,
temperature=0.000001,
top_p=1,
extra_body={
"repetition_penalty":1.05,
"frequency_penalty":0,
"presence_penalty":0
},
stream=True
)
partial = ""
for chunk in stream:
delta = chunk.choices[0].delta.content
if delta:
partial += delta
yield partial
except Exception as e:
yield f"Failed to get response: {e}"
def build_demo() -> gr.Blocks:
chatbot = gr.Chatbot(type="messages", allow_tags=["think"])
textbox = gr.MultimodalTextbox(
show_label=False,
placeholder="Enter text, or upload one or more images...",
file_types=["image"],
file_count="multiple",
max_plain_text_length=32768
)
model_selector = gr.Dropdown(
label="Model",
choices=[
("Qianfan-VL-70B", "qianfan-vl-70b"),
("Qianfan-VL-8B", "qianfan-vl-8b"),
],
value=DEFAULT_MODEL,
)
return gr.ChatInterface(
fn=stream_response,
type="messages",
multimodal=True,
chatbot=chatbot,
textbox=textbox,
title="Qianfan-VL: Domain-Enhanced Vision-Language Models",
description="""**Qianfan-VL** is a series of general-purpose multimodal large language models enhanced for enterprise-level multimodal applications. The models offer deep optimization for high-frequency scenarios in industrial deployment while maintaining strong general capabilities.
🔗 **Links**: [GitHub](https://github.com/baidubce/Qianfan-VL) | [HuggingFace](https://huggingface.co/baidu) | [ModelScope](https://modelscope.cn/organization/baidu-qianfan) | [Documentation](https://github.com/baidubce/qianfan-models-cookbook)""",
additional_inputs=[model_selector],
additional_inputs_accordion=gr.Accordion("Options", open=True),
).queue(default_concurrency_limit=8)
def main():
build_demo().launch()
if __name__ == "__main__":
main()