Spaces:

build-small-hackathon
/

Structured-Data-Rescuer

Running

App Files Files Community

TensorVizion commited on 24 days ago

Commit

0db4a3c

verified ·

1 Parent(s): 866f975

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -60

app.py CHANGED Viewed

@@ -4,11 +4,9 @@ import re
 from huggingface_hub import InferenceClient
 # Replace this with your exact model repo ID
-# e.g., "tensorvizion/O-wen-4.6"
 MODEL_ID = "tensorvizion/O-wen-4.6"
 # Initialize the HF inference client
-# Note: In a private Space, you would pass token=os.environ["HF_TOKEN"]
 client = InferenceClient(model=MODEL_ID)
 def extract_data(raw_text, fields_to_extract):
@@ -37,64 +35,12 @@ def extract_data(raw_text, fields_to_extract):
         response = client.chat_completion(
             messages=messages,
             max_tokens=1024,
-            temperature=0.1, # Low temperature for more deterministic/factual data extraction
         )
-output_text = response.choices[0].message.content.strip()
-        # Fallback: Strip markdown code blocks if the model adds them
-        # This prevents the json parse step from crashing
-        json_match = re.search(r'
-http://googleusercontent.com/immersive_entry_chip/0
-        # Parse the text into an actual JSON dictionary for the Gradio UI
-        structured_data = json.loads(output_text)
-        return structured_data
-    except json.JSONDecodeError:
-        return {
-            "error": "The model failed to return valid JSON. It returned this instead:",
-            "raw_output": output_text
-        }
-    except Exception as e:
-        return {"error": str(e)}
-# -------------------------
-# Build the Gradio UI
-# -------------------------
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🛟 The Data Rescuer")
-    gr.Markdown(f"**Powered by `{MODEL_ID}`** | Turn messy transcripts, notes, and OCR text into clean JSON data.")
-    with gr.Row():
-        # Left Column: Inputs
-        with gr.Column():
-            raw_input = gr.Textbox(
-                label="1. Paste Unstructured Text",
-                placeholder="Paste your messy meeting notes, emails, or raw text here...",
-                lines=12
-            )
-            schema_input = gr.Textbox(
-                label="2. What fields do you want to extract?",
-                placeholder="e.g., Company Name, Contact Person, Deadline, Action Items (list)",
-                lines=3
-            )
-            extract_btn = gr.Button("Extract Structured Data", variant="primary")
-        # Right Column: Output
-        with gr.Column():
-            json_output = gr.JSON(label="Structured Output")
-    # Connect the button to the function
-    extract_btn.click(
-        fn=extract_data,
-        inputs=[raw_input, schema_input],
-        outputs=json_output
-    )
-# Launch the app
-if __name__ == "__main__":
-    demo.launch()

 from huggingface_hub import InferenceClient
 # Replace this with your exact model repo ID
 MODEL_ID = "tensorvizion/O-wen-4.6"
 # Initialize the HF inference client
 client = InferenceClient(model=MODEL_ID)
 def extract_data(raw_text, fields_to_extract):
         response = client.chat_completion(
             messages=messages,
             max_tokens=1024,
+            temperature=0.1,
         )
+        output_text = response.choices[0].message.content.strip()
+        # Fallback: Safely strip markdown code blocks without using complex regex
+        # that might break code editors during copy-pasting
+        if output_text.startswith("```"):
+            # Remove the starting ```json or