Spaces:

opendatalab
/

TRivia-3B

Build error

App Files Files Community

Carkham commited on Dec 2, 2025

Commit

0373e86

verified ·

1 Parent(s): 48d7965

Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

.gitattributes +7 -0
README.md +10 -6
app.py +167 -0
assets/.DS_Store +0 -0
assets/example/docstructbench_llm-raw-scihub-o.O-ijc.22994.pdf_3_5.png +3 -0
assets/example/table_photo_chn_35.png +3 -0
assets/example/table_photo_eng_23.png +3 -0
assets/example/table_scan_chn_1.png +3 -0
assets/example/table_scan_chn_37.png +3 -0
assets/example/table_scan_eng_12.png +3 -0
header.html +119 -0
otsl_utils.py +413 -0
requirements.txt +24 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/example/llm-raw-the-eye-o.O-1995_2418.pdf_1.jpg filter=lfs diff=lfs merge=lfs -text
+assets/example/docstructbench_llm-raw-scihub-o.O-ijc.22994.pdf_3_5.png filter=lfs diff=lfs merge=lfs -text
+assets/example/table_photo_chn_35.png filter=lfs diff=lfs merge=lfs -text
+assets/example/table_photo_eng_23.png filter=lfs diff=lfs merge=lfs -text
+assets/example/table_scan_chn_1.png filter=lfs diff=lfs merge=lfs -text
+assets/example/table_scan_chn_37.png filter=lfs diff=lfs merge=lfs -text
+assets/example/table_scan_eng_12.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,12 +1,16 @@
 ---
-title: TRivia 3B
-emoji: 🔥
-colorFrom: indigo
-colorTo: indigo
 sdk: gradio
-sdk_version: 6.0.2
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: TRivia-3B
+emoji: 🚀
+colorFrom: purple
+colorTo: green
 sdk: gradio
+sdk_version: 5.9.1
 app_file: app.py
 pinned: false
+license: apache-2.0
+short_description: Demo for TRivia
+models:
+- opendatalab/TRivia-3B
 ---
+https://arxiv.org/abs/2512.01248

app.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import os
+os.environ["GRADIO_TEMP_DIR"] = "./tmp"
+import time
+import torch
+import spaces
+import tempfile
+import sys
+import gradio as gr
+from io import StringIO
+from contextlib import contextmanager
+from threading import Thread
+from PIL import Image
+from transformers import (
+    AutoProcessor,
+    AutoModelForCausalLM,
+    AutoModel,
+    AutoTokenizer,
+    Qwen2_5_VLForConditionalGeneration,
+    TextIteratorStreamer
+)
+from huggingface_hub import snapshot_download
+from qwen_vl_utils import process_vision_info
+from otsl_utils import convert_otsl_to_html
+# == download weights ==
+# model_dir = snapshot_download('opendatalab/TRivia-3B', local_dir='./models/TRivia-3B')
+# == select device ==
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+# Load TRivia-3B
+try:
+    MODEL_ID = "opendatalab/TRivia-3B"
+    processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
+    model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+        MODEL_ID,
+        trust_remote_code=True,
+        torch_dtype=torch.float16,
+        device_map="auto"
+    ).eval()
+    print("✓ TRivia-3B loaded")
+except Exception as e:
+    model = None
+    processor = None
+@spaces.GPU
+def recognize_image(image: Image.Image,
+                   max_new_tokens: int, temperature: float):
+    if image is None:
+        yield "Please upload an image.", "Please upload an image."
+        return
+    try:
+        # Prepare messages in chat format
+        messages = [{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "You are an AI specialized in recognizing and extracting table from images. Your mission is to analyze the table image and generate the result in OTSL format using specified tags. Output only the results without any other words and explanation."},
+                {"type": "image"},
+            ]
+        }]
+        prompt_full = processor.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        inputs = processor(
+            text=[prompt_full],
+            images=[image],
+            return_tensors="pt",
+            padding=True
+        ).to(device)
+        streamer = TextIteratorStreamer(
+            processor.tokenizer if hasattr(processor, 'tokenizer') else processor,
+            skip_prompt=True,
+            skip_special_tokens=True
+        )
+        generation_kwargs = {
+            **inputs,
+            "streamer": streamer,
+            "max_new_tokens": max_new_tokens,
+            "do_sample": True,
+            "temperature": temperature,
+            "repetition_penalty": 1.05,
+        }
+        thread = Thread(target=model.generate, kwargs=generation_kwargs)
+        thread.start()
+        # Stream the results
+        buffer = ""
+        for new_text in streamer:
+            buffer += new_text
+            buffer = buffer.replace("<|im_end|>", "")
+            html_text = convert_otsl_to_html(buffer)
+            time.sleep(0.01)
+            yield buffer, html_text, html_text
+        # Ensure thread completes
+        thread.join()
+    except Exception as e:
+        error_msg = f"Error during generation: {str(e)}"
+        print(f"Full error: {e}")
+        import traceback
+        traceback.print_exc()
+        yield error_msg, error_msg, error_msg
+def gradio_reset():
+    return gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None)
+if __name__ == "__main__":
+    with open("header.html", "r") as file:
+        header = file.read()
+    with gr.Blocks() as demo:
+        gr.HTML(header)
+        with gr.Row():
+            with gr.Column():
+                input_img = gr.Image(label=" ", interactive=True)
+                with gr.Row():
+                    clear = gr.Button(value="Clear")
+                    predict = gr.Button(value="Table Recognition", interactive=True, variant="primary")
+                with gr.Accordion("Advanced Settings", open=False):
+                    max_tokens = gr.Slider(
+                        minimum=1,
+                        maximum=8192,
+                        value=4096,
+                        step=1,
+                        label="Max New Tokens"
+                    )
+                    temperature = gr.Slider(
+                        minimum=0.1,
+                        maximum=2.0,
+                        value=0.1,
+                        step=0.1,
+                        label="Temperature"
+                    )
+                with gr.Accordion("Examples:"):
+                    example_root = os.path.join(os.path.dirname(__file__), "assets", "example")
+                    gr.Examples(
+                        examples=[os.path.join(example_root, _) for _ in os.listdir(example_root) if
+                                    _.endswith("png")],
+                        inputs=[input_img],
+                    )
+            with gr.Column():
+                rendered_html = gr.Markdown(label="Rendered HTML:", show_label=True)
+                output_html = gr.Textbox(label="Converted HTML:", interactive=False)
+                pred_otsl = gr.Textbox(label="Predicted OTSL:", interactive=False)
+        clear.click(gradio_reset, inputs=None, outputs=[input_img, pred_otsl, output_html, rendered_html])
+        predict.click(recognize_image, inputs=[input_img, max_tokens, temperature], outputs=[pred_otsl, output_html, rendered_html])
+    demo.launch(server_name="0.0.0.0", server_port=10041, debug=True)

assets/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

assets/example/docstructbench_llm-raw-scihub-o.O-ijc.22994.pdf_3_5.png ADDED Viewed

Git LFS Details

SHA256: 7aa4d1400999670fa8dd8e4577dbc425cf105ca50aaca326adcdc09e82049aef
Pointer size: 131 Bytes
Size of remote file: 190 kB

assets/example/table_photo_chn_35.png ADDED Viewed

Git LFS Details

SHA256: 05edd8d535dd7363b0a72cd6bc4214c09099e7785c0534da7a4f5e556d5e1296
Pointer size: 132 Bytes
Size of remote file: 1.42 MB

assets/example/table_photo_eng_23.png ADDED Viewed

Git LFS Details

SHA256: 267480e94337b07eb6e1829496953e81a4615d30ff7fad571d9065e887e3ca63
Pointer size: 132 Bytes
Size of remote file: 1.49 MB

assets/example/table_scan_chn_1.png ADDED Viewed

Git LFS Details

SHA256: b28c8c79ce6b0cadfb9ebff3c3691b5a14ad996838ebbcf126b8d1db84f7daa9
Pointer size: 131 Bytes
Size of remote file: 191 kB

assets/example/table_scan_chn_37.png ADDED Viewed

Git LFS Details

SHA256: d5d6571e65aa4df696fff7e76770974b1053f2600e1c01f81afb8c4bb6f50feb
Pointer size: 132 Bytes
Size of remote file: 1.03 MB

assets/example/table_scan_eng_12.png ADDED Viewed

Git LFS Details

SHA256: 3165c9aa5bb3b3570e97ac03c8057de916d8615e4a386eebf71e712427fce953
Pointer size: 131 Bytes
Size of remote file: 603 kB

header.html ADDED Viewed

	@@ -0,0 +1,119 @@

+<html><head>
+    <!-- <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.3/css/bulma.min.css"> -->
+    <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.4/css/all.css">
+  <style>
+    .link-block {
+      border: 1px solid transparent;
+      border-radius: 24px;
+      background-color: rgba(54, 54, 54, 1);
+      cursor: pointer !important;
+    }
+    .link-block:hover {
+      background-color: rgba(54, 54, 54, 0.75) !important;
+      cursor: pointer !important;
+    }
+    .external-link {
+      display: inline-flex;
+      align-items: center;
+      height: 36px;
+      line-height: 36px;
+      padding: 0 16px;
+      cursor: pointer !important;
+    }
+    .external-link,
+    .external-link:hover {
+      cursor: pointer !important;
+    }
+    a {
+      text-decoration: none;
+    }
+  </style></head>
+  <body>
+    <div style="
+        display: flex;
+        flex-direction: column;
+        justify-content: center;
+        align-items: center;
+        text-align: center;
+        background: linear-gradient(45deg, #007bff 0%, #0056b3 100%);
+        padding: 24px;
+        gap: 24px;
+        border-radius: 8px;
+      ">
+      <div style="
+          display: flex;
+          flex-direction: column;
+          align-items: center;
+          gap: 16px;
+        ">
+        <div style="display: flex; flex-direction: column; gap: 8px">
+          <h1 style="
+              font-size: 48px;
+              color: #fafafa;
+              margin: 0;
+              font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
+                'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
+            ">
+            TRivia-3B: Demo
+          </h1>
+        </div>
+      </div>
+      <p style="
+          margin: 0;
+          line-height: 1.6rem;
+          font-size: 16px;
+          color: #fafafa;
+          opacity: 0.8;
+        ">
+        Self-supervised Fine-tuning of Vision-Language Models for Table Recognition.<br>
+      </p>
+      <style>
+        .link-block {
+          display: inline-block;
+        }
+        .link-block + .link-block {
+          margin-left: 20px;
+        }
+      </style>
+      <div class="column has-text-centered">
+        <div class="publication-links">
+          <!-- Code Link. -->
+          <span class="link-block">
+            <a href="https://github.com/opendatalab/TRivia" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
+              <span class="icon" style="margin-right: 4px">
+                <i class="fab fa-github" style="color: white; margin-right: 4px"></i>
+              </span>
+              <span style="color: white">Code</span>
+            </a>
+          </span>
+          <!-- Code Link. -->
+          <span class="link-block">
+            <a href="https://huggingface.co/opendatalab/TRivia-3B" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
+              <span class="icon" style="margin-right: 4px">
+                <i class="fas fa-archive" style="color: white; margin-right: 4px"></i>
+              </span>
+              <span style="color: white">Code</span>
+            </a>
+          </span>
+          <!-- Paper Link. -->
+          <span class="link-block">
+            <a href="https://arxiv.org/abs/2512.01248" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
+              <span class="icon" style="margin-right: 8px">
+                <i class="fas fa-file" style="color: white"></i>
+              </span>
+              <span style="color: white">Paper</span>
+            </a>
+          </span>
+        </div>
+      </div>
+      <!-- New Demo Links -->
+    </div>
+  </body></html>

otsl_utils.py ADDED Viewed

	@@ -0,0 +1,413 @@

+import re
+import itertools
+import html
+from typing import Any, Dict, Final, List, Literal, Optional, Tuple, Union
+from pydantic import (
+    AnyUrl,
+    BaseModel,
+    ConfigDict,
+    Field,
+    StringConstraints,
+    computed_field,
+    field_validator,
+    model_validator,
+)
+class TableCell(BaseModel):
+    """TableCell."""
+    row_span: int = 1
+    col_span: int = 1
+    start_row_offset_idx: int
+    end_row_offset_idx: int
+    start_col_offset_idx: int
+    end_col_offset_idx: int
+    text: str
+    column_header: bool = False
+    row_header: bool = False
+    row_section: bool = False
+    @model_validator(mode="before")
+    @classmethod
+    def from_dict_format(cls, data: Any) -> Any:
+        """from_dict_format."""
+        if isinstance(data, Dict):
+            # Check if this is a native BoundingBox or a bbox from docling-ibm-models
+            if (
+                # "bbox" not in data
+                # or data["bbox"] is None
+                # or isinstance(data["bbox"], BoundingBox)
+                "text"
+                in data
+            ):
+                return data
+            text = data["bbox"].get("token", "")
+            if not len(text):
+                text_cells = data.pop("text_cell_bboxes", None)
+                if text_cells:
+                    for el in text_cells:
+                        text += el["token"] + " "
+                text = text.strip()
+            data["text"] = text
+        return data
+class TableData(BaseModel):  # TBD
+    """BaseTableData."""
+    table_cells: List[TableCell] = []
+    num_rows: int = 0
+    num_cols: int = 0
+    @computed_field  # type: ignore
+    @property
+    def grid(
+        self,
+    ) -> List[List[TableCell]]:
+        """grid."""
+        # Initialise empty table data grid (only empty cells)
+        table_data = [
+            [
+                TableCell(
+                    text="",
+                    start_row_offset_idx=i,
+                    end_row_offset_idx=i + 1,
+                    start_col_offset_idx=j,
+                    end_col_offset_idx=j + 1,
+                )
+                for j in range(self.num_cols)
+            ]
+            for i in range(self.num_rows)
+        ]
+        # Overwrite cells in table data for which there is actual cell content.
+        for cell in self.table_cells:
+            for i in range(
+                min(cell.start_row_offset_idx, self.num_rows),
+                min(cell.end_row_offset_idx, self.num_rows),
+            ):
+                for j in range(
+                    min(cell.start_col_offset_idx, self.num_cols),
+                    min(cell.end_col_offset_idx, self.num_cols),
+                ):
+                    table_data[i][j] = cell
+        return table_data
+"""
+OTSL
+"""
+OTSL_NL = "<nl>"
+OTSL_FCEL = "<fcel>"
+OTSL_ECEL = "<ecel>"
+OTSL_LCEL = "<lcel>"
+OTSL_UCEL = "<ucel>"
+OTSL_XCEL = "<xcel>"
+def otsl_extract_tokens_and_text(s: str):
+    # Pattern to match anything enclosed by < >
+    # (including the angle brackets themselves)
+    # pattern = r"(<[^>]+>)"
+    pattern = r"(" + r"|".join([OTSL_NL, OTSL_FCEL, OTSL_ECEL, OTSL_LCEL, OTSL_UCEL, OTSL_XCEL]) + r")"
+    # Find all tokens (e.g. "<otsl>", "<loc_140>", etc.)
+    tokens = re.findall(pattern, s)
+    # Remove any tokens that start with "<loc_"
+    tokens = [token for token in tokens]
+    # Split the string by those tokens to get the in-between text
+    text_parts = re.split(pattern, s)
+    text_parts = [token for token in text_parts]
+    # Remove any empty or purely whitespace strings from text_parts
+    text_parts = [part for part in text_parts if part.strip()]
+    return tokens, text_parts
+def otsl_parse_texts(texts, tokens):
+    split_word = OTSL_NL
+    split_row_tokens = [
+        list(y)
+        for x, y in itertools.groupby(tokens, lambda z: z == split_word)
+        if not x
+    ]
+    table_cells = []
+    r_idx = 0
+    c_idx = 0
+    # 检查并补充矩阵以使其完整
+    if split_row_tokens:
+        # 找到最大列数
+        max_cols = max(len(row) for row in split_row_tokens)
+        # 补充每一行使其达到最大列数
+        for row_idx, row in enumerate(split_row_tokens):
+            while len(row) < max_cols:
+                row.append(OTSL_ECEL)
+        # 在texts中也需要相应补充<ecel>
+        # 重新构建texts以包含补充的<ecel>
+        new_texts = []
+        text_idx = 0
+        for row_idx, row in enumerate(split_row_tokens):
+            for col_idx, token in enumerate(row):
+                new_texts.append(token)
+                # 如果这个token在原始texts中有对应的文本内容，添加它
+                if text_idx < len(texts) and texts[text_idx] == token:
+                    text_idx += 1
+                    # 检查下一个是否是��本内容（不是token）
+                    if (text_idx < len(texts) and
+                        texts[text_idx] not in [OTSL_NL, OTSL_FCEL, OTSL_ECEL, OTSL_LCEL, OTSL_UCEL, OTSL_XCEL]):
+                        new_texts.append(texts[text_idx])
+                        text_idx += 1
+            new_texts.append(OTSL_NL)
+            if text_idx < len(texts) and texts[text_idx] == OTSL_NL:
+                text_idx += 1
+        texts = new_texts
+    def count_right(tokens, c_idx, r_idx, which_tokens):
+        span = 0
+        c_idx_iter = c_idx
+        while tokens[r_idx][c_idx_iter] in which_tokens:
+            c_idx_iter += 1
+            span += 1
+            if c_idx_iter >= len(tokens[r_idx]):
+                return span
+        return span
+    def count_down(tokens, c_idx, r_idx, which_tokens):
+        span = 0
+        r_idx_iter = r_idx
+        while tokens[r_idx_iter][c_idx] in which_tokens:
+            r_idx_iter += 1
+            span += 1
+            if r_idx_iter >= len(tokens):
+                return span
+        return span
+    for i, text in enumerate(texts):
+        cell_text = ""
+        if text in [
+            OTSL_FCEL,
+            OTSL_ECEL,
+        ]:
+            row_span = 1
+            col_span = 1
+            right_offset = 1
+            if text != OTSL_ECEL and (texts[i + 1] not in [OTSL_NL, OTSL_FCEL, OTSL_ECEL, OTSL_LCEL, OTSL_UCEL, OTSL_XCEL]):
+                cell_text = texts[i + 1]
+                right_offset = 2
+            # Check next element(s) for lcel / ucel / xcel,
+            # set properly row_span, col_span
+            next_right_cell = ""
+            if i + right_offset < len(texts):
+                next_right_cell = texts[i + right_offset]
+            next_bottom_cell = ""
+            if r_idx + 1 < len(split_row_tokens):
+                if c_idx < len(split_row_tokens[r_idx + 1]):
+                    next_bottom_cell = split_row_tokens[r_idx + 1][c_idx]
+            if next_right_cell in [
+                OTSL_LCEL,
+                OTSL_XCEL,
+            ]:
+                # we have horisontal spanning cell or 2d spanning cell
+                col_span += count_right(
+                    split_row_tokens,
+                    c_idx + 1,
+                    r_idx,
+                    [OTSL_LCEL, OTSL_XCEL],
+                )
+            if next_bottom_cell in [
+                OTSL_UCEL,
+                OTSL_XCEL,
+            ]:
+                # we have a vertical spanning cell or 2d spanning cell
+                row_span += count_down(
+                    split_row_tokens,
+                    c_idx,
+                    r_idx + 1,
+                    [OTSL_UCEL, OTSL_XCEL],
+                )
+            table_cells.append(
+                TableCell(
+                    text=cell_text.strip(),
+                    row_span=row_span,
+                    col_span=col_span,
+                    start_row_offset_idx=r_idx,
+                    end_row_offset_idx=r_idx + row_span,
+                    start_col_offset_idx=c_idx,
+                    end_col_offset_idx=c_idx + col_span,
+                )
+            )
+        if text in [
+            OTSL_FCEL,
+            OTSL_ECEL,
+            OTSL_LCEL,
+            OTSL_UCEL,
+            OTSL_XCEL,
+        ]:
+            c_idx += 1
+        if text == OTSL_NL:
+            r_idx += 1
+            c_idx = 0
+    return table_cells, split_row_tokens
+# def export_to_html(table_data: TableData):
+#     nrows = table_data.num_rows
+#     ncols = table_data.num_cols
+#     if len(table_data.table_cells) == 0:
+#         return ""
+#     body = ""
+#     grid = table_data.grid
+#     for i in range(nrows):
+#         body += "<tr>"
+#         for j in range(ncols):
+#             cell: TableCell = grid[i][j]
+#             rowspan, rowstart = (
+#                 cell.row_span,
+#                 cell.start_row_offset_idx,
+#             )
+#             colspan, colstart = (
+#                 cell.col_span,
+#                 cell.start_col_offset_idx,
+#             )
+#             if rowstart != i:
+#                 continue
+#             if colstart != j:
+#                 continue
+#             content = html.escape(cell.text.strip())
+#             celltag = "td"
+#             if cell.column_header:
+#                 celltag = "th"
+#             opening_tag = f"{celltag}"
+#             if rowspan > 1:
+#                 opening_tag += f' rowspan="{rowspan}"'
+#             if colspan > 1:
+#                 opening_tag += f' colspan="{colspan}"'
+#             body += f"<{opening_tag}>{content}</{celltag}>"
+#         body += "</tr>"
+#     # dir = get_text_direction(text)
+#     body = f"<table>{body}</table>"
+#     return body
+def export_to_html(table_data: TableData) -> str:
+    nrows = table_data.num_rows
+    ncols = table_data.num_cols
+    # print(nrows, ncols)
+    if not table_data.table_cells:
+        return ""
+    current_grid = table_data.grid
+    html_str_list = []
+    for i in range(nrows):
+        html_str_list.append("<tr>")
+        for j in range(ncols):
+            cell: TableCell = current_grid[i][j]
+            if cell.start_row_offset_idx != i or cell.start_col_offset_idx != j:
+                continue
+            # content = html.escape(cell.text.strip())
+            content = cell.text.strip()
+            cell_tag_name = "th" if cell.column_header else "td"
+            opening_tag_parts = [f"<{cell_tag_name}"]
+            if cell.row_span > 1:
+                opening_tag_parts.append(f' rowspan="{cell.row_span}"')
+            if cell.col_span > 1:
+                opening_tag_parts.append(f' colspan="{cell.col_span}"')
+            opening_tag_parts.append(">")
+            opening_tag = "".join(opening_tag_parts)
+            html_str_list.append(f"{opening_tag}{content}</{cell_tag_name}>")
+        html_str_list.append("</tr>")
+    body_content = "".join(html_str_list)
+    return f"<table>{body_content}</table>"
+def convert_otsl_to_html(otsl_content: str) -> str:
+    # if not otsl_content.endswith("<nl>\n"):
+    #     return ""
+    tokens, mixed_texts = otsl_extract_tokens_and_text(otsl_content)
+    table_cells, split_row_tokens = otsl_parse_texts(mixed_texts, tokens)
+    table_data = TableData(
+                num_rows=len(split_row_tokens),
+                num_cols=(
+                    max(len(row) for row in split_row_tokens) if split_row_tokens else 0
+                ),
+                table_cells=table_cells,
+            )
+    result = export_to_html(table_data)
+    return result
+if __name__ == "__main__":
+    import time
+    # test
+    a = """
+    <fcel><nl>\n
+    <fcel><nl>\n"""
+    b = """<fcel>Reviewer<fcel>Representation<fcel>Consultant<fcel>Speaker's Bureau<fcel>Ownership/ Partnership/ Principal<fcel>Personal Research<fcel>Institutional, Organizational, or Other Financial Benefit<fcel>Expert Witness<nl>
+<fcel>John E. Brush<fcel>Official Reviewer–ACCF Board of Trustees<fcel>● United Healthcare<fcel>None<fcel>None<fcel>None<fcel>● PROMETHEUS Payment (Board member)<fcel>None<nl>
+<fcel>David P. Faxon<fcel>Official Reviewer–AHA<fcel>● Johnson & Johnson<fcel>None<fcel>● CULPRIT Trial (PI)*<fcel>None<fcel>● Circulation: Cardiovascular Interventions—Editor*<fcel>None<nl>
+<ucel><ucel><ucel><ucel><fcel>● RIVA Medical<ucel><ucel><ucel><nl>
+<fcel>Robert A. Harrington<fcel>Official Reviewer–AHA<fcel>● AstraZeneca*<fcel>None<fcel>None<fcel>● AstraZeneca<fcel>None<fcel>None<nl>
+<ucel><ucel><fcel>● Baxter<ucel><ucel><fcel>● Baxter<ucel><ucel><nl>
+<ucel><ucel><fcel>● CSL Behring<ucel><ucel><fcel>● Bristol-Myers Squibb*<ucel><ucel><nl>
+<ucel><ucel><fcel>● Eli Lilly<ucel><ucel><fcel>● GlaxoSmithKline<ucel><ucel><nl>
+<ucel><ucel><fcel>● Luiypold<ucel><ucel><fcel>● The Medicines Company<ucel><ucel><nl>
+<ucel><ucel><fcel>● Merck<ucel><ucel><fcel>● Merck*<ucel><ucel><nl>
+<ucel><ucel><fcel>● Novartis<ucel><ucel><fcel>● Portola*<ucel><ucel><nl>
+<ucel><ucel><fcel>● Otsuka Maryland Research Institute<ucel><ucel><fcel>● Schering-Plough*<ucel><ucel><nl>
+<ucel><ucel><fcel>● Regado<ucel><ucel><ucel><ucel><ucel><nl>
+<ucel><ucel><fcel>● Sanofi-aventis<ucel><ucel><ucel><ucel><ucel><nl>
+<ucel><ucel><fcel>● Schering-Plough*<ucel><ucel><ucel><ucel><ucel><nl>
+<ucel><ucel><fcel>● WebMD*<ucel><ucel><ucel><ucel><ucel><nl>
+<fcel>Judith S. Hochman<fcel>Official Reviewer–ACCF/AHA Task Force on Practice Guidelines<fcel>● BMS/Sanofi<fcel>None<fcel>None<fcel>● Johnson & Johnson/Bayer Healthcare AG (DSMB)<fcel>None<fcel>None<nl>
+<ucel><ucel><fcel>● Eli Lilly<ucel><ucel><ucel><ucel><ucel><nl>
+<ucel><ucel><fcel>● GlaxoSmithKline<ucel><ucel><ucel><ucel><ucel><nl>
+<ucel><ucel><fcel>● Millennium Pharmaceuticals/ Schering-Plough<ucel><ucel><fcel>● Schering-Plough (TIMI 50) (DSMB)<ucel><ucel><nl>
+<fcel>Rodney H. Zimmermann<fcel>Official Reviewer–ACCF Board of Governors<fcel>● AstraZeneca<fcel>● AstraZeneca<fcel>None<fcel>● AstraZeneca<fcel>None<fcel>None<nl>
+<ucel><ucel><fcel>● Boehringer Ingelheim<fcel>● Merck-Frost<fcel>● Sanofi-aventis<ucel><fcel>● Sanofi-aventis<ucel><nl>
+<ucel><ucel><fcel>● Bristol-Myers Squibb<fcel>● Servier<ucel><ucel><ucel><ucel><nl>
+<ucel><ucel><fcel>● Medtronic<ucel><ucel><ucel><ucel><ucel><nl>
+<ucel><ucel><fcel>● Sanofi-aventis<ucel><ucel><ucel><ucel><ucel><nl>
+<ucel><ucel><fcel>● Schering-Plough<ucel><ucel><ucel><ucel><ucel><nl>
+<fcel>Steven Brown<fcel>Organizational Reviewer–AAFP<fcel>None<fcel>None<fcel>None<fcel>None<fcel>None<fcel>None<nl>
+<fcel>Joseph C. Cleveland<fcel>Organizational Reviewer–STS<fcel>● Baxter Biosurgery<fcel>None<fcel>None<fcel>None<fcel>● Heartware<fcel>None<nl>
+<ucel><ucel><fcel>● Essential Pharmaceuticals<ucel><ucel><ucel><fcel>● Thoratec<ucel><nl>
+<fcel>Wyatt Decker<fcel>Organizational Reviewer–ACEP<fcel>None<fcel>None<fcel>None<fcel>None<fcel>None<fcel>None<nl>
+<fcel>Joseph A. de Gregorio<fcel>Organizational Reviewer–SCAI<fcel>None<fcel>None<fcel>None<fcel>None<fcel>None<fcel>None<nl>
+<fcel>Deborah B. Diercks<fcel>Organizational Reviewer–ACEP<fcel>● AstraZeneca<fcel>None<fcel>None<fcel>None<fcel>● Society of Chest Pain Centers and Providers<fcel>None<nl>
+<ucel><ucel><fcel>● Sanofi-aventis<ucel><ucel><ucel><ucel><ucel><nl>
+<ucel><ucel><fcel>● Schering-Plough<ucel><ucel><ucel><ucel><ucel><nl>
+<fcel>Benjamin Hatten<fcel>Organizational Reviewer–ACEP<fcel>None<fcel>None<fcel>None<fcel>None<fcel>None<fcel>None<nl>
+<fcel>Loren F. Hiratzka<fcel>Organizational Reviewer–STS<fcel>None<fcel>None<fcel>None<fcel>None<fcel>● Cardiac, Vascular, and Thoracic Surgeons*<fcel>None<nl>
+<ucel><ucel><ucel><ucel><ucel><ucel><fcel>● TriHealth (Bethesda North and Good Samaritan Hospitals)*<ucel><nl>
+<fcel>Jason H. Rogers<fcel>Organizational Reviewer–SCAI<fcel>● Ample Medical<fcel>None<fcel>None<fcel>None<fcel>None<fcel>None<nl>
+<fcel>Vincenza T. Show<fcel>Organizational Reviewer–ACP<fcel>None<fcel>None<fcel>None<fcel>● Boehringer Ingelheim*<fcel>● ACP*<fcel>None<nl>
+<ucel><ucel><ucel><ucel><ucel><fcel>● Bristol-Myers Squibb*<ucel><ucel><nl>
+"""
+    print(convert_otsl_to_html(b))

requirements.txt ADDED Viewed

	@@ -0,0 +1,24 @@

+# Flash Attention - CUDA 12, PyTorch 2.6, Python 3.10
+flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
+# Core ML/AI Libraries
+torch==2.6.0
+torchvision
+accelerate>=0.24.0
+# Transformers - using version compatible with both sets of models
+transformers==4.57.1
+tokenizers>=0.20.3
+transformers-stream-generator
+# Hugging Face
+huggingface_hub
+hf_xet
+spaces>=0.20.0
+# Vision & Image Processing
+qwen-vl-utils
+# Web Interface
+gradio==5.9.1
+pydantic==2.10.6