Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import spaces | |
| from PIL import Image | |
| import io | |
| models = { | |
| "jinaai/reader-lm-0.5b": AutoModelForCausalLM.from_pretrained("jinaai/reader-lm-0.5b", trust_remote_code=True).to("cuda").eval(), | |
| } | |
| tokenizers = { | |
| "jinaai/reader-lm-0.5b": AutoTokenizer.from_pretrained("jinaai/reader-lm-0.5b", trust_remote_code=True), | |
| } | |
| def run_example(html_content, model_id="jinaai/reader-lm-0.5b"): | |
| model = models[model_id] | |
| tokenizer = tokenizers[model_id] | |
| messages = [{"role": "user", "content": html_content}] | |
| input_text=tokenizer.apply_chat_template(messages, tokenize=False) | |
| inputs = tokenizer.encode(input_text, return_tensors="pt").to(device) | |
| outputs = model.generate(inputs, max_new_tokens=1024, temperature=0, do_sample=False, repetition_penalty=1.08) | |
| return tokenizer.decode(outputs[0]) | |
| css = """ | |
| #output { | |
| height: 500px; | |
| overflow: auto; | |
| border: 1px solid #ccc; | |
| } | |
| """ | |
| with gr.Blocks(css=css) as demo: | |
| gr.Markdown(""" | |
| # HTML-to-Markdown | |
| """) | |
| with gr.Tab(label="Main"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="maxiw/Florence-2-ScreenQA-base") | |
| html_content = gr.Textbox(label="HTML") | |
| submit_btn = gr.Button(value="Submit") | |
| with gr.Column(): | |
| output_text = gr.Textbox(label="Markdown") | |
| submit_btn.click(run_example, [html_content, model_selector], [output_text]) | |
| demo.launch(debug=True) |