Spaces:

build-small-hackathon
/

Structured-Data-Rescuer

Running

App Files Files Community

TensorVizion commited on 24 days ago

Commit

789c331

verified ·

1 Parent(s): 7e399de

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -11

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import tempfile
 from huggingface_hub import InferenceClient
 # Replace this with your exact model repo ID
-MODEL_ID = "meta-llama/Llama-3.1-70B-Instruct"
 # Securely load the Hugging Face token from Space secrets
 hf_token = os.environ.get("HF_TOKEN")
@@ -63,12 +63,71 @@ custom_css = """
 # -------------------------
 # Helper & Extraction Logic
 # -------------------------
 def extract_data(raw_text, fields_to_extract):
     if not hf_token:
-        return {"error": "HF_TOKEN secret is missing. Please add your Hugging Face Access Token to the Space Secrets."}, [["Error", "HF_TOKEN missing"]]
     if not raw_text.strip() or not fields_to_extract.strip():
-        return {"error": "Please provide both raw text and fields to extract."}, [["Error", "Incomplete inputs"]]
     # Construct the system instruction
     system_prompt = (
@@ -115,21 +174,20 @@ def extract_data(raw_text, fields_to_extract):
         table_data = []
         if isinstance(structured_data, dict):
             for k, v in structured_data.items():
-                # Stringify lists or nested objects to fit cleanly inside cells
-                val_str = ", ".join(v) if isinstance(v, list) else str(v)
                 table_data.append([k, val_str])
         elif isinstance(structured_data, list):
             for idx, item in enumerate(structured_data):
                 table_data.append([f"Item {idx + 1}", str(item)])
-        return structured_data, table_data
     except json.JSONDecodeError:
         error_dict = {
             "error": "The model failed to return valid JSON. It returned this instead:",
             "raw_output": output_text
         }
-        return error_dict, [["Error", "Invalid JSON parsed"]]
     except Exception as e:
         error_msg = str(e)
         if "model_not_found" in error_msg or "does not exist" in error_msg:
@@ -141,8 +199,9 @@ def extract_data(raw_text, fields_to_extract):
                     "3. GGUF or LoRA adapter models are not directly supported by the Serverless API."
                 ]
             }
-            return err_dict, [["Connection Error", "Model Not Found"]]
-        return {"error": error_msg}, [["Error", error_msg]]
 def generate_csv(json_data):
     """Converts the JSON output into a downloadable CSV file."""
@@ -216,6 +275,15 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
         # Right Column: Multi-view Output Panels
         with gr.Column(scale=1):
             with gr.Tabs():
                 with gr.TabItem("📊 Structured Table"):
                     table_output = gr.Dataframe(
@@ -254,11 +322,11 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
     # -------------------------
     # Event Connections
     # -------------------------
-    # 1. Connect extraction button to both the Table View and JSON Tree
     extract_btn.click(
         fn=extract_data,
         inputs=[raw_input, schema_input],
-        outputs=[json_output, table_output]
     )
     # 2. Connect CSV generation

 from huggingface_hub import InferenceClient
 # Replace this with your exact model repo ID
+MODEL_ID = "tensorvizion/O-wen-4.6"
 # Securely load the Hugging Face token from Space secrets
 hf_token = os.environ.get("HF_TOKEN")
 # -------------------------
 # Helper & Extraction Logic
 # -------------------------
+def generate_kpi_html(structured_data):
+    """Generates modern, responsive KPI metrics cards dynamically based on JSON data."""
+    if not structured_data or "error" in structured_data:
+        return """
+        <div style='display: flex; justify-content: center; align-items: center; height: 100px; border: 2px dashed var(--border-color-primary, #e5e7eb); border-radius: 12px; color: var(--text-color-subdued, #9ca3af);'>
+            Await extraction to generate KPI metrics...
+        </div>
+        """
+    cards_html = ""
+    if isinstance(structured_data, dict):
+        # Pick the top 4 attributes to show as metrics
+        items = list(structured_data.items())[:4]
+        for key, val in items:
+            # Clean up the key label
+            display_key = str(key).replace("_", " ").replace("-", " ").title()
+            # Format list value representation
+            if isinstance(val, list):
+                display_val = ", ".join(map(str, val))
+            else:
+                display_val = str(val)
+            # Truncate if string is too long for the card layout
+            if len(display_val) > 40:
+                display_val = display_val[:37] + "..."
+            # Dynamic highlight accents based on field types
+            accent_color = "#6366f1" # default Indigo
+            if any(x in display_key.lower() for x in ["price", "total", "amount", "cost", "revenue", "budget"]):
+                accent_color = "#10b981" # Emerald for cash/costs
+            elif any(x in display_key.lower() for x in ["date", "deadline", "due", "time"]):
+                accent_color = "#f59e0b" # Amber for dates/reminders
+            elif any(x in display_key.lower() for x in ["status", "priority", "importance"]):
+                accent_color = "#ef4444" # Crimson for status/alerts
+            cards_html += f"""
+            <div style='background: var(--body-background-fill, #ffffff); padding: 1rem; border-radius: 12px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05); border: 1px solid var(--border-color-primary, #e5e7eb); border-left: 5px solid {accent_color}; min-width: 140px; flex: 1;'>
+                <div style='font-size: 0.7rem; color: var(--text-color-subdued, #6b7280); text-transform: uppercase; font-weight: 700; letter-spacing: 0.05em; margin-bottom: 0.25rem;'>{display_key}</div>
+                <div style='font-size: 1.05rem; color: var(--body-text-color, #111827); font-weight: 800; word-break: break-word;'>{display_val}</div>
+            </div>
+            """
+    elif isinstance(structured_data, list):
+        # Summary KPI for array data structures
+        cards_html = f"""
+        <div style='background: var(--body-background-fill, #ffffff); padding: 1rem; border-radius: 12px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05); border: 1px solid var(--border-color-primary, #e5e7eb); border-left: 5px solid #6366f1; min-width: 140px; flex: 1;'>
+            <div style='font-size: 0.7rem; color: var(--text-color-subdued, #6b7280); text-transform: uppercase; font-weight: 700; letter-spacing: 0.05em; margin-bottom: 0.25rem;'>Total Records Found</div>
+            <div style='font-size: 1.5rem; color: var(--body-text-color, #111827); font-weight: 800;'>{len(structured_data)}</div>
+        </div>
+        """
+    return f"""
+    <div style='display: flex; flex-wrap: wrap; gap: 0.75rem; margin-bottom: 1rem; width: 100%;'>
+        {cards_html}
+    </div>
+    """
 def extract_data(raw_text, fields_to_extract):
     if not hf_token:
+        err_state = {"error": "HF_TOKEN secret is missing. Please add your Hugging Face Access Token to the Space Secrets."}
+        return err_state, [["Error", "HF_TOKEN missing"]], generate_kpi_html(err_state)
     if not raw_text.strip() or not fields_to_extract.strip():
+        err_state = {"error": "Please provide both raw text and fields to extract."}
+        return err_state, [["Error", "Incomplete inputs"]], generate_kpi_html(err_state)
     # Construct the system instruction
     system_prompt = (
         table_data = []
         if isinstance(structured_data, dict):
             for k, v in structured_data.items():
+                val_str = ", ".join(map(str, v)) if isinstance(v, list) else str(v)
                 table_data.append([k, val_str])
         elif isinstance(structured_data, list):
             for idx, item in enumerate(structured_data):
                 table_data.append([f"Item {idx + 1}", str(item)])
+        return structured_data, table_data, generate_kpi_html(structured_data)
     except json.JSONDecodeError:
         error_dict = {
             "error": "The model failed to return valid JSON. It returned this instead:",
             "raw_output": output_text
         }
+        return error_dict, [["Error", "Invalid JSON parsed"]], generate_kpi_html(error_dict)
     except Exception as e:
         error_msg = str(e)
         if "model_not_found" in error_msg or "does not exist" in error_msg:
                     "3. GGUF or LoRA adapter models are not directly supported by the Serverless API."
                 ]
             }
+            return err_dict, [["Connection Error", "Model Not Found"]], generate_kpi_html(err_dict)
+        err_state = {"error": error_msg}
+        return err_state, [["Error", error_msg]], generate_kpi_html(err_state)
 def generate_csv(json_data):
     """Converts the JSON output into a downloadable CSV file."""
         # Right Column: Multi-view Output Panels
         with gr.Column(scale=1):
+            # Dynamic HTML summary cards (Dashboard metrics style)
+            kpi_output = gr.HTML(
+                value="""
+                <div style='display: flex; justify-content: center; align-items: center; height: 100px; border: 2px dashed var(--border-color-primary, #e5e7eb); border-radius: 12px; color: var(--text-color-subdued, #9ca3af);'>
+                    Await extraction to generate KPI metrics...
+                </div>
+                """
+            )
             with gr.Tabs():
                 with gr.TabItem("📊 Structured Table"):
                     table_output = gr.Dataframe(
     # -------------------------
     # Event Connections
     # -------------------------
+    # 1. Connect extraction button to the Table View, JSON Tree, and KPI output
     extract_btn.click(
         fn=extract_data,
         inputs=[raw_input, schema_input],
+        outputs=[json_output, table_output, kpi_output]
     )
     # 2. Connect CSV generation