| <!DOCTYPE html> |
| <html> |
| <head> |
| <meta charset="utf-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <title>Gradio-Lite: Serverless Gradio Running Entirely in Your Browser</title> |
| <meta name="description" content="Gradio-Lite: Serverless Gradio Running Entirely in Your Browser"> |
|
|
| <script type="module" crossorigin src="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.js"></script> |
| <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.css" /> |
|
|
| <style> |
| html, body { |
| margin: 0; |
| padding: 0; |
| height: 100%; |
| } |
| </style> |
| </head> |
| <body> |
| <gradio-lite> |
| <gradio-file name="app.py" entrypoint> |
| import gradio as gr |
| from transformers_js_py import pipeline |
|
|
| generator = await pipeline( |
| "text-generation", |
| "onnx-community/Qwen2.5-0.5B-Instruct", |
| { "dtype": "q4", "device": "webgpu" } |
| ) |
|
|
| async def chat_response(message, history): |
| messages = [ |
| { "role": "system", "content": "You are a great assistant." }, |
| { "role": "user", "content": message } |
| ] |
|
|
| output = await generator(messages, { |
| "max_new_tokens": 256, |
| "do_sample": True, |
| "temperature": 0.3, |
| }) |
| response = output[0]["generated_text"][-1]["content"] |
| return response |
|
|
| demo = gr.ChatInterface(chat_response, type="messages", autofocus=False) |
|
|
| demo.launch() |
| </gradio-file> |
|
|
| <gradio-requirements> |
| transformers-js-py |
| </gradio-requirements> |
| </gradio-lite> |
| </body> |
| </html> |