Spaces:

build-small-hackathon
/

Structured-Data-Rescuer

Running

App Files Files Community

TensorVizion commited on 25 days ago

Commit

5fe2bf9

verified ·

1 Parent(s): dfbd07a

Update app.py

Browse files

Files changed (1) hide show

app.py +114 -36

app.py CHANGED Viewed

@@ -6,8 +6,7 @@ import tempfile
 from huggingface_hub import InferenceClient
 # Replace this with your exact model repo ID
-# Note: Ensure exact casing. If the model is a GGUF, we will need to change how this runs.
-MODEL_ID = "Qwen/Qwen2.5-7B-Instruct"
 # Securely load the Hugging Face token from Space secrets
 hf_token = os.environ.get("HF_TOKEN")
@@ -15,12 +14,61 @@ hf_token = os.environ.get("HF_TOKEN")
 # Initialize the HF inference client with the token
 client = InferenceClient(model=MODEL_ID, token=hf_token)
 def extract_data(raw_text, fields_to_extract):
     if not hf_token:
-        return {"error": "HF_TOKEN secret is missing. Please add your Hugging Face Access Token to the Space Secrets."}
     if not raw_text.strip() or not fields_to_extract.strip():
-        return {"error": "Please provide both raw text and fields to extract."}
     # Construct the system instruction
     system_prompt = (
@@ -54,48 +102,54 @@ def extract_data(raw_text, fields_to_extract):
         if cleaned_text.startswith("```"):
             lines = cleaned_text.splitlines()
             if len(lines) >= 2:
-                # Discard the opening line (e.g., ```json or ```)
                 if lines[0].startswith("```"):
                     lines = lines[1:]
-                # Discard the closing line (e.g., ```)
                 if lines and lines[-1].strip() == "```":
                     lines = lines[:-1]
                 cleaned_text = "\n".join(lines).strip()
         # Parse the text into an actual JSON dictionary
         structured_data = json.loads(cleaned_text)
-        return structured_data
     except json.JSONDecodeError:
-        return {
             "error": "The model failed to return valid JSON. It returned this instead:",
             "raw_output": output_text
         }
     except Exception as e:
         error_msg = str(e)
-        # Enhanced error handling for model connectivity issues
         if "model_not_found" in error_msg or "does not exist" in error_msg:
-            return {
                 "error": f"The model '{MODEL_ID}' was not found on Hugging Face.",
                 "troubleshooting": [
-                    "1. Check your Hugging Face repo for typos in the MODEL_ID string (it is case-sensitive).",
-                    "2. If the model is Private, ensure your HF_TOKEN has read access.",
-                    "3. If your model is a GGUF or LoRA adapter, the Serverless API does not support it directly.",
-                    "Test by temporarily changing MODEL_ID to 'Qwen/Qwen2.5-7B-Instruct' to verify the app works."
-                ],
-                "raw_error": error_msg
             }
-        return {"error": error_msg}
 def generate_csv(json_data):
     """Converts the JSON output into a downloadable CSV file."""
     if not json_data or "error" in json_data:
         return None
-    # Normalize data into a list of dictionaries for the CSV writer
     if isinstance(json_data, dict):
-        if "error" in json_data:
-            return None
         data_list = [json_data]
     elif isinstance(json_data, list):
         data_list = json_data
@@ -108,7 +162,6 @@ def generate_csv(json_data):
     try:
         with open(csv_path, 'w', newline='', encoding='utf-8') as f:
-            # Gather all possible column headers from the JSON keys
             headers = set()
             for item in data_list:
                 if isinstance(item, dict):
@@ -123,7 +176,6 @@ def generate_csv(json_data):
             for item in data_list:
                 if isinstance(item, dict):
-                    # Convert nested lists/dicts to strings so the CSV doesn't break
                     flat_item = {k: (str(v) if isinstance(v, (list, dict)) else v) for k, v in item.items()}
                     writer.writerow(flat_item)
@@ -131,13 +183,23 @@ def generate_csv(json_data):
     except Exception as e:
         return None
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🛟 The Data Rescuer")
-    gr.Markdown(f"**Powered by `{MODEL_ID}`** | Turn messy transcripts, notes, and OCR text into clean JSON and CSV data.")
     with gr.Row():
         # Left Column: Inputs
-        with gr.Column():
             raw_input = gr.Textbox(
                 label="1. Paste Unstructured Text",
                 placeholder="Paste your messy meeting notes, emails, or raw text here...",
@@ -150,16 +212,29 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 lines=3
             )
-            extract_btn = gr.Button("Extract Structured Data", variant="primary")
-        # Right Column: Output
-        with gr.Column():
-            json_output = gr.JSON(label="Structured Output")
-            # CSV Export UI
-            export_btn = gr.Button("⬇️ Download as CSV", variant="secondary")
-            csv_output = gr.File(label="Your CSV File", interactive=False)
     gr.Markdown("### Try it out with these examples:")
     gr.Examples(
         examples=[
@@ -176,14 +251,17 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         label="Click an example to populate the inputs"
     )
-    # 1. Connect extraction button
     extract_btn.click(
         fn=extract_data,
         inputs=[raw_input, schema_input],
-        outputs=json_output
     )
-    # 2. Connect CSV export button
     export_btn.click(
         fn=generate_csv,
         inputs=[json_output],

 from huggingface_hub import InferenceClient
 # Replace this with your exact model repo ID
+MODEL_ID = "tensorvizion/RagSeek-v1"
 # Securely load the Hugging Face token from Space secrets
 hf_token = os.environ.get("HF_TOKEN")
 # Initialize the HF inference client with the token
 client = InferenceClient(model=MODEL_ID, token=hf_token)
+# -------------------------
+# Custom CSS Styling
+# -------------------------
+custom_css = """
+.hero-container {
+    background: linear-gradient(135deg, #6366f1 0%, #14b8a6 100%);
+    padding: 2.5rem;
+    border-radius: 20px;
+    color: white;
+    margin-bottom: 2rem;
+    box-shadow: 0 10px 25px -5px rgba(99, 102, 241, 0.2);
+}
+.hero-container h1 {
+    color: white !important;
+    font-size: 2.5rem !important;
+    font-weight: 800 !important;
+    margin-bottom: 0.5rem;
+    text-shadow: 0 2px 4px rgba(0,0,0,0.1);
+}
+.hero-container p {
+    color: rgba(255, 255, 255, 0.9) !important;
+    font-size: 1.1rem !important;
+}
+.primary-btn {
+    background: linear-gradient(90deg, #6366f1 0%, #14b8a6 100%) !important;
+    border: none !important;
+    color: white !important;
+    font-weight: 600 !important;
+    border-radius: 10px !important;
+    transition: all 0.3s ease !important;
+    padding: 12px 24px !important;
+}
+.primary-btn:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 8px 20px -5px rgba(99, 102, 241, 0.4);
+}
+.secondary-btn {
+    border-radius: 10px !important;
+    font-weight: 600 !important;
+}
+.feedback-card {
+    border-left: 4px solid #6366f1;
+    background-color: rgba(99, 102, 241, 0.05);
+}
+"""
+# -------------------------
+# Helper & Extraction Logic
+# -------------------------
 def extract_data(raw_text, fields_to_extract):
     if not hf_token:
+        return {"error": "HF_TOKEN secret is missing. Please add your Hugging Face Access Token to the Space Secrets."}, [["Error", "HF_TOKEN missing"]]
     if not raw_text.strip() or not fields_to_extract.strip():
+        return {"error": "Please provide both raw text and fields to extract."}, [["Error", "Incomplete inputs"]]
     # Construct the system instruction
     system_prompt = (
         if cleaned_text.startswith("```"):
             lines = cleaned_text.splitlines()
             if len(lines) >= 2:
                 if lines[0].startswith("```"):
                     lines = lines[1:]
                 if lines and lines[-1].strip() == "```":
                     lines = lines[:-1]
                 cleaned_text = "\n".join(lines).strip()
         # Parse the text into an actual JSON dictionary
         structured_data = json.loads(cleaned_text)
+        # Convert JSON structure to a displayable 2D list for the Table view
+        table_data = []
+        if isinstance(structured_data, dict):
+            for k, v in structured_data.items():
+                # Stringify lists or nested objects to fit cleanly inside cells
+                val_str = ", ".join(v) if isinstance(v, list) else str(v)
+                table_data.append([k, val_str])
+        elif isinstance(structured_data, list):
+            for idx, item in enumerate(structured_data):
+                table_data.append([f"Item {idx + 1}", str(item)])
+        return structured_data, table_data
     except json.JSONDecodeError:
+        error_dict = {
             "error": "The model failed to return valid JSON. It returned this instead:",
             "raw_output": output_text
         }
+        return error_dict, [["Error", "Invalid JSON parsed"]]
     except Exception as e:
         error_msg = str(e)
         if "model_not_found" in error_msg or "does not exist" in error_msg:
+            err_dict = {
                 "error": f"The model '{MODEL_ID}' was not found on Hugging Face.",
                 "troubleshooting": [
+                    "1. Check your Hugging Face repo for typos (case-sensitive).",
+                    "2. Verify HF_TOKEN secret read permissions.",
+                    "3. GGUF or LoRA adapter models are not directly supported by the Serverless API."
+                ]
             }
+            return err_dict, [["Connection Error", "Model Not Found"]]
+        return {"error": error_msg}, [["Error", error_msg]]
 def generate_csv(json_data):
     """Converts the JSON output into a downloadable CSV file."""
     if not json_data or "error" in json_data:
         return None
     if isinstance(json_data, dict):
         data_list = [json_data]
     elif isinstance(json_data, list):
         data_list = json_data
     try:
         with open(csv_path, 'w', newline='', encoding='utf-8') as f:
             headers = set()
             for item in data_list:
                 if isinstance(item, dict):
             for item in data_list:
                 if isinstance(item, dict):
                     flat_item = {k: (str(v) if isinstance(v, (list, dict)) else v) for k, v in item.items()}
                     writer.writerow(flat_item)
     except Exception as e:
         return None
+# -------------------------
+# Build the Gradio UI
+# -------------------------
+with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
+    # Styled Header Block
+    with gr.HTML(elem_classes="hero-container"):
+        gr.Markdown(
+            f"""
+            # 🛟 The Data Rescuer
+            Turn messy logs, disorganized lists, automated transcripts, and raw OCR scripts into highly structured business-ready assets — powered by `{MODEL_ID}`.
+            """
+        )
     with gr.Row():
         # Left Column: Inputs
+        with gr.Column(scale=1):
             raw_input = gr.Textbox(
                 label="1. Paste Unstructured Text",
                 placeholder="Paste your messy meeting notes, emails, or raw text here...",
                 lines=3
             )
+            extract_btn = gr.Button("🚀 Extract Structured Data", variant="primary", elem_classes="primary-btn")
+        # Right Column: Multi-view Output Panels
+        with gr.Column(scale=1):
+            with gr.Tabs():
+                with gr.TabItem("📊 Structured Table"):
+                    table_output = gr.Dataframe(
+                        headers=["Field Name", "Extracted Value"],
+                        datatype=["str", "str"],
+                        interactive=False,
+                        wrap=True
+                    )
+                with gr.TabItem("🔍 Raw JSON Tree"):
+                    json_output = gr.JSON(label="JSON Object")
+            # Action controls below outputs
+            with gr.Row():
+                export_btn = gr.Button("💾 Build Export File", variant="secondary", elem_classes="secondary-btn")
+                csv_output = gr.File(label="Ready for Download", interactive=False)
+    # -------------------------
+    # Examples Panel
+    # -------------------------
     gr.Markdown("### Try it out with these examples:")
     gr.Examples(
         examples=[
         label="Click an example to populate the inputs"
     )
+    # -------------------------
+    # Event Connections
+    # -------------------------
+    # 1. Connect extraction button to both the Table View and JSON Tree
     extract_btn.click(
         fn=extract_data,
         inputs=[raw_input, schema_input],
+        outputs=[json_output, table_output]
     )
+    # 2. Connect CSV generation
     export_btn.click(
         fn=generate_csv,
         inputs=[json_output],