Spaces:

APEXlogistics
/

ErrorMsgIdentifier

Sleeping

App Files Files Community

joycecast commited on May 1, 2025

Commit

fcad26a

verified ·

1 Parent(s): 1c86181

Create app.py

Browse files

Files changed (1) hide show

app.py +61 -0

app.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import gradio as gr
+import fitz  # PyMuPDF
+import re
+import requests
+from io import BytesIO
+import pandas as pd
+def check_pdf_messages(pdf_url, identifiers_input):
+    # Parse identifiers
+    identifiers = [id.strip() for id in identifiers_input.split(',') if id.strip().isdigit()]
+    if not identifiers:
+        return "❌ No valid Message Identifiers entered.", None
+    # Download PDF
+    try:
+        response = requests.get(pdf_url)
+        response.raise_for_status()
+        pdf_bytes = BytesIO(response.content)
+    except Exception as e:
+        return f"❌ Failed to load PDF: {str(e)}", None
+    # Extract text using PyMuPDF
+    try:
+        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+        full_text = "\n".join([page.get_text() for page in doc])
+    except Exception as e:
+        return f"❌ Failed to extract text from PDF: {str(e)}", None
+    # Regex to match Line# + Message Identifier
+    id_pattern = "|".join(re.escape(id_) for id_ in identifiers)
+    regex = re.compile(rf"Line#\s+(\d+)\s+({id_pattern})")
+    # Parse matches
+    matches = []
+    for match in regex.finditer(full_text):
+        line_num, msg_id = match.groups()
+        matches.append({"Line#": int(line_num), "Message Identifier": msg_id})
+    if not matches:
+        return "✅ No matching Message Identifiers found in the PDF.", None
+    # Return results as DataFrame
+    df = pd.DataFrame(matches).sort_values(by="Line#").reset_index(drop=True)
+    return "✅ Matches found:", df
+# Gradio UI
+demo = gr.Interface(
+    fn=check_pdf_messages,
+    inputs=[
+        gr.Textbox(label="PDF URL", placeholder="https://.../yourfile.pdf"),
+        gr.Textbox(label="Message Identifier List", placeholder="e.g., 523,600"),
+    ],
+    outputs=[
+        gr.Textbox(label="Status"),
+        gr.Dataframe(label="Matching Lines", type="pandas"),
+    ],
+    title="PDF Message Identifier Line Checker",
+    description="Paste a PDF URL and a comma-separated list of Message Identifiers. This tool finds which Line# entries match those IDs."
+)
+demo.launch()