Spaces:
Sleeping
Sleeping
| import argparse | |
| import copy | |
| import os | |
| import re | |
| import subprocess | |
| import tempfile | |
| import base64 | |
| from pathlib import Path | |
| import fitz | |
| import gradio as gr | |
| import time | |
| import html | |
| from openai import OpenAI | |
| from s3_uploads import upload_to_s3 | |
| from environs import env | |
| stop_generation = False | |
| def stream_from_vllm(messages): | |
| global stop_generation | |
| client = OpenAI( | |
| base_url="https://router.huggingface.co/v1", | |
| api_key=env.str("HF_API_KEY"), | |
| ) | |
| response = client.chat.completions.create( | |
| model="THUDM/GLM-4.1V-9B-Thinking:novita", | |
| messages=messages, | |
| temperature=0.01, | |
| stream=True, | |
| max_tokens=8000 | |
| ) | |
| for chunk in response: | |
| if stop_generation: | |
| break | |
| if chunk.choices and chunk.choices[0].delta: | |
| delta = chunk.choices[0].delta | |
| yield delta | |
| class GLM4VModel: | |
| def _strip_html(self, text: str) -> str: | |
| return re.sub(r"<[^>]+>", "", text).strip() | |
| def _wrap_text(self, text: str): | |
| return [{"type": "text", "text": text}] | |
| def _image_to_base64(self, image_path): | |
| with open(image_path, "rb") as image_file: | |
| encoded_string = base64.b64encode(image_file.read()).decode('utf-8') | |
| ext = Path(image_path).suffix.lower() | |
| if ext in ['.jpg', '.jpeg']: | |
| mime_type = 'image/jpeg' | |
| elif ext == '.png': | |
| mime_type = 'image/png' | |
| elif ext == '.gif': | |
| mime_type = 'image/gif' | |
| elif ext == '.bmp': | |
| mime_type = 'image/bmp' | |
| elif ext in ['.tiff', '.tif']: | |
| mime_type = 'image/tiff' | |
| elif ext == '.webp': | |
| mime_type = 'image/webp' | |
| else: | |
| mime_type = 'image/jpeg' | |
| return f"data:{mime_type};base64,{encoded_string}" | |
| def _pdf_to_imgs(self, pdf_path): | |
| doc = fitz.open(pdf_path) | |
| imgs = [] | |
| for i in range(doc.page_count): | |
| pix = doc.load_page(i).get_pixmap(dpi=180) | |
| img_p = os.path.join(tempfile.gettempdir(), f"{Path(pdf_path).stem}_{i}.png") | |
| pix.save(img_p) | |
| imgs.append(img_p) | |
| doc.close() | |
| return imgs | |
| def _ppt_to_imgs(self, ppt_path): | |
| tmp = tempfile.mkdtemp() | |
| subprocess.run( | |
| ["libreoffice", "--headless", "--convert-to", "pdf", "--outdir", tmp, ppt_path], | |
| check=True, | |
| ) | |
| pdf_path = os.path.join(tmp, Path(ppt_path).stem + ".pdf") | |
| return self._pdf_to_imgs(pdf_path) | |
| def _files_to_content(self, media): | |
| out = [] | |
| for f in media or []: | |
| ext = Path(f).suffix.lower() | |
| if ext in [".mp4", ".avi", ".mkv", ".mov", ".wmv", ".flv", ".webm", ".mpeg", ".m4v"]: | |
| out.append({"type": "video_url", "video_url": {"url": upload_to_s3(f)}}) | |
| elif ext in [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"]: | |
| out.append({"type": "image_url", "image_url": {"url": upload_to_s3(f)}}) | |
| elif ext in [".ppt", ".pptx"]: | |
| for p in self._ppt_to_imgs(f): | |
| out.append({"type": "image_url", "image_url": {"url": upload_to_s3(p)}}) | |
| elif ext == ".pdf": | |
| for p in self._pdf_to_imgs(f): | |
| out.append({"type": "image_url", "image_url": {"url": upload_to_s3(p)}}) | |
| return out | |
| def _stream_fragment(self, reasoning_content: str = "", content: str = "", skip_think: bool = True): | |
| think_html = "" | |
| answer_md = "" | |
| if reasoning_content and not skip_think: | |
| reasoning_content_clean = reasoning_content.strip() | |
| think_html = ( | |
| "### 💭 Thinking\n" | |
| "<details open>\n" | |
| "<summary>Click to expand</summary>\n\n" | |
| f"{reasoning_content_clean}\n" | |
| "</details>\n" | |
| ) | |
| if content: | |
| answer_md = content.strip() | |
| return think_html + "\n\n" + answer_md | |
| def _build_messages(self, raw_hist, sys_prompt): | |
| msgs = [] | |
| if sys_prompt.strip(): | |
| msgs.append({"role": "system", "content": [{"type": "text", "text": sys_prompt.strip()}]}) | |
| for h in raw_hist: | |
| if h["role"] == "user": | |
| msgs.append({"role": "user", "content": h["content"]}) | |
| else: | |
| raw = re.sub(r"<details.*?</details>", "", h["content"], flags=re.DOTALL) | |
| clean_content = self._strip_html(raw).strip() | |
| if clean_content: | |
| msgs.append({"role": "assistant", "content": self._wrap_text(clean_content)}) | |
| return msgs | |
| def stream_generate(self, raw_hist, sys_prompt: str, *, skip_special_tokens: bool = False): | |
| global stop_generation | |
| stop_generation = False | |
| msgs = self._build_messages(raw_hist, sys_prompt) | |
| reasoning_buffer = "" | |
| content_buffer = "" | |
| try: | |
| for delta in stream_from_vllm(msgs): | |
| if stop_generation: | |
| break | |
| if hasattr(delta, 'reasoning_content') and delta.reasoning_content: | |
| reasoning_buffer += delta.reasoning_content | |
| elif hasattr(delta, 'content') and delta.content: | |
| content_buffer += delta.content | |
| else: | |
| if isinstance(delta, dict): | |
| if 'reasoning_content' in delta and delta['reasoning_content']: | |
| reasoning_buffer += delta['reasoning_content'] | |
| if 'content' in delta and delta['content']: | |
| content_buffer += delta['content'] | |
| elif hasattr(delta, 'content') and delta.content: | |
| content_buffer += delta.content | |
| yield self._stream_fragment(reasoning_buffer, content_buffer) | |
| except Exception as e: | |
| error_msg = f"Error during streaming: {str(e)}" | |
| yield self._stream_fragment("", error_msg) | |
| glm4v = GLM4VModel() | |
| sys_prompt = """Instructions: | |
| Extract only "BILL OF METERIAL" table containing columns same as it is! | |
| colums: (POSITION, DESCRIPTION, N PIECES, MATERIAL (like SA 516 Gr.70N or SA 105 N), DIMENSIONS(like 1700 I.D. X 2045H 50 THK.), WT.Kgs | |
| Ignore title blocks, revision notes, drawing numbers, and general annotations outside the "BILL OF METERIAL". | |
| If a page contains multiple tables, extract only those explicitly related to BILL OF METERIAL. | |
| Preserve the row and column's order and structure as it is! | |
| Do not include any surrounding decorative lines or borders—only. | |
| give clean tabular data. | |
| output format: markdown table format with following columns (POSITION, DESCRIPTION, N PIECES, MATERIAL, DIMENSIONS(like 1700 I.D. X 2045H 50 THK.) and WT.Kgs)""" | |
| def extract_table_from_file(file): | |
| if file is None: | |
| return "Please upload a file." | |
| payload = glm4v._files_to_content([file.name]) | |
| raw_hist = [{"role": "user", "content": payload}] | |
| full_response = "" | |
| yield "<h2>🌀 Processing...</h2>\n" | |
| try: | |
| for chunk in glm4v.stream_generate(raw_hist, sys_prompt): | |
| full_response = chunk | |
| yield full_response | |
| except Exception as e: | |
| yield f"<div style='color: red;'>Error: {html.escape(str(e))}</div>" | |
| theme = gr.themes.Ocean( | |
| primary_hue="gray", | |
| ) | |
| with gr.Blocks(title="demo", theme=theme) as demo: | |
| gr.Markdown( | |
| "<div style='text-align:center; margin-bottom:20px;'><h1> PDF Extraction Demo</h1></div" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| up = gr.File(label="Upload File", type="filepath") | |
| format_selector = gr.Radio(choices=["CSV", "JSON"], label="Output Format", value="CSV") | |
| submit_btn = gr.Button("Submit", variant="primary") | |
| with gr.Column(): | |
| output_markdown = gr.Markdown(label="Extracted Table") | |
| submit_btn.click( | |
| extract_table_from_file, | |
| inputs=[up], | |
| outputs=[output_markdown], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |