Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| # Load the JinaAI ReaderLM-v2 model | |
| model_name = "jinaai/ReaderLM-v2" | |
| html_converter = pipeline("text-generation", model=model_name) | |
| # Function to convert HTML to Markdown or JSON | |
| def convert_html(html_input, output_format): | |
| # Prepare the prompt for the model | |
| prompt = f"Convert the following HTML into {output_format}:\n\n{html_input}" | |
| # Generate the output using the model | |
| response = html_converter(prompt, max_length=500, num_return_sequences=1) | |
| converted_output = response[0]['generated_text'] | |
| # Extract the relevant part of the output (remove the prompt) | |
| converted_output = converted_output.replace(prompt, "").strip() | |
| return converted_output | |
| # Gradio Interface with NoCrypt/miku theme | |
| interface = gr.Interface( | |
| fn=convert_html, | |
| inputs=[ | |
| gr.Textbox(lines=10, placeholder="Paste your raw HTML here...", label="Raw HTML Input"), | |
| gr.Radio(["Markdown", "JSON"], label="Output Format", value="Markdown") | |
| ], | |
| outputs=gr.Textbox(lines=10, label="Converted Output"), | |
| title="HTML to Markdown/JSON Converter", | |
| description="Convert raw HTML into beautifully formatted Markdown or JSON using JinaAI ReaderLM-v2.", | |
| theme="NoCrypt/miku", # Apply the NoCrypt/miku theme | |
| examples=[ | |
| ["<h1>Hello World</h1><p>This is a <strong>test</strong>.</p>", "Markdown"], | |
| ["<ul><li>Item 1</li><li>Item 2</li></ul>", "JSON"] | |
| ] | |
| ) | |
| # Launch the interface | |
| interface.launch() |