Spaces:

build-small-hackathon
/

Structured-Data-Rescuer

Running

App Files Files Community

TensorVizion commited on 27 days ago

Commit

f79c1f2

verified ·

1 Parent(s): 203b146

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -10

app.py CHANGED Viewed

@@ -1,15 +1,24 @@
 import gradio as gr
 import json
 import re
 from huggingface_hub import InferenceClient
 # Replace this with your exact model repo ID
 MODEL_ID = "tensorvizion/O-wen-4.6"
-# Initialize the HF inference client
-client = InferenceClient(model=MODEL_ID)
 def extract_data(raw_text, fields_to_extract):
     if not raw_text.strip() or not fields_to_extract.strip():
         return {"error": "Please provide both raw text and fields to extract."}
@@ -41,10 +50,8 @@ def extract_data(raw_text, fields_to_extract):
         output_text = response.choices[0].message.content.strip()
         # Fallback: Safely strip markdown code blocks without using complex regex
-        # that might break code editors during copy-pasting
         if output_text.startswith("```"):
-            # Remove the starting ```json or
-            # Remove the ending ```
             output_text = re.sub(r"\n?```$", "", output_text)
         # Parse the text into an actual JSON dictionary for the Gradio UI
@@ -59,12 +66,48 @@ def extract_data(raw_text, fields_to_extract):
     except Exception as e:
         return {"error": str(e)}
-# -------------------------
-# Build the Gradio UI
-# -------------------------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🛟 The Data Rescuer")
-    gr.Markdown(f"**Powered by `{MODEL_ID}`** | Turn messy transcripts, notes, and OCR text into clean JSON data.")
     with gr.Row():
         # Left Column: Inputs
@@ -86,13 +129,40 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         # Right Column: Output
         with gr.Column():
             json_output = gr.JSON(label="Structured Output")
-    # Connect the button to the function
     extract_btn.click(
         fn=extract_data,
         inputs=[raw_input, schema_input],
         outputs=json_output
     )
 # Launch the app
 if __name__ == "__main__":

 import gradio as gr
 import json
 import re
+import os
+import csv
+import tempfile
 from huggingface_hub import InferenceClient
 # Replace this with your exact model repo ID
 MODEL_ID = "tensorvizion/O-wen-4.6"
+# Securely load the Hugging Face token from Space secrets
+hf_token = os.environ.get("HF_TOKEN")
+# Initialize the HF inference client with the token
+client = InferenceClient(model=MODEL_ID, token=hf_token)
 def extract_data(raw_text, fields_to_extract):
+    if not hf_token:
+        return {"error": "HF_TOKEN secret is missing. Please add your Hugging Face Access Token to the Space Secrets."}
     if not raw_text.strip() or not fields_to_extract.strip():
         return {"error": "Please provide both raw text and fields to extract."}
         output_text = response.choices[0].message.content.strip()
         # Fallback: Safely strip markdown code blocks without using complex regex
         if output_text.startswith("```"):
+            output_text = re.sub(r"^```(?:json)?\n?", "", output_text)
             output_text = re.sub(r"\n?```$", "", output_text)
         # Parse the text into an actual JSON dictionary for the Gradio UI
     except Exception as e:
         return {"error": str(e)}
+def generate_csv(json_data):
+    """Converts the JSON output into a downloadable CSV file."""
+    if not json_data or "error" in json_data:
+        return None
+    # Normalize data into a list of dictionaries for the CSV writer
+    if isinstance(json_data, dict):
+        data_list = [json_data]
+    elif isinstance(json_data, list):
+        data_list = json_data
+    else:
+        return None
+    # Create a secure temporary file to hold the CSV
+    temp_dir = tempfile.mkdtemp()
+    csv_path = os.path.join(temp_dir, "extracted_data.csv")
+    try:
+        with open(csv_path, 'w', newline='', encoding='utf-8') as f:
+            # Gather all possible column headers from the JSON keys
+            headers = set()
+            for item in data_list:
+                if isinstance(item, dict):
+                    headers.update(item.keys())
+            headers = list(headers)
+            writer = csv.DictWriter(f, fieldnames=headers)
+            writer.writeheader()
+            for item in data_list:
+                if isinstance(item, dict):
+                    # Convert nested lists/dicts to strings so the CSV doesn't break
+                    flat_item = {k: (str(v) if isinstance(v, (list, dict)) else v) for k, v in item.items()}
+                    writer.writerow(flat_item)
+        return csv_path
+    except Exception as e:
+        return None
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🛟 The Data Rescuer")
+    gr.Markdown(f"**Powered by `{MODEL_ID}`** | Turn messy transcripts, notes, and OCR text into clean JSON and CSV data.")
     with gr.Row():
         # Left Column: Inputs
         # Right Column: Output
         with gr.Column():
             json_output = gr.JSON(label="Structured Output")
+            # CSV Export UI
+            export_btn = gr.Button("⬇️ Download as CSV", variant="secondary")
+            csv_output = gr.File(label="Your CSV File", interactive=False)
+    gr.Markdown("### Try it out with these examples:")
+    gr.Examples(
+        examples=[
+            [
+                "Hey guys, quick recap of today's sync. Sarah is going to handle the frontend React components by next Tuesday. John, you need to fix the database migration issue before Friday. Also, our client 'Acme Corp' wants the final delivery by October 15th.",
+                "Task Owner, Task Description, Deadline, Client Name"
+            ],
+            [
+                "Invoice #99214. From: BlueTech Software. To: Jane Doe. Items: 1x Server Maintenance ($500), 2x Cloud Storage ($100 each). Total due: $700. Please pay by end of month.",
+                "Invoice Number, Sender, Recipient, Items (list of names and prices), Total Amount"
+            ]
+        ],
+        inputs=[raw_input, schema_input],
+        label="Click an example to populate the inputs"
+    )
+    # 1. Connect extraction button
     extract_btn.click(
         fn=extract_data,
         inputs=[raw_input, schema_input],
         outputs=json_output
     )
+    # 2. Connect CSV export button
+    export_btn.click(
+        fn=generate_csv,
+        inputs=[json_output],
+        outputs=[csv_output]
+    )
 # Launch the app
 if __name__ == "__main__":