TensorVizion commited on
Commit
f79c1f2
·
verified ·
1 Parent(s): 203b146

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -10
app.py CHANGED
@@ -1,15 +1,24 @@
1
  import gradio as gr
2
  import json
3
  import re
 
 
 
4
  from huggingface_hub import InferenceClient
5
 
6
  # Replace this with your exact model repo ID
7
  MODEL_ID = "tensorvizion/O-wen-4.6"
8
 
9
- # Initialize the HF inference client
10
- client = InferenceClient(model=MODEL_ID)
 
 
 
11
 
12
  def extract_data(raw_text, fields_to_extract):
 
 
 
13
  if not raw_text.strip() or not fields_to_extract.strip():
14
  return {"error": "Please provide both raw text and fields to extract."}
15
 
@@ -41,10 +50,8 @@ def extract_data(raw_text, fields_to_extract):
41
  output_text = response.choices[0].message.content.strip()
42
 
43
  # Fallback: Safely strip markdown code blocks without using complex regex
44
- # that might break code editors during copy-pasting
45
  if output_text.startswith("```"):
46
- # Remove the starting ```json or
47
- # Remove the ending ```
48
  output_text = re.sub(r"\n?```$", "", output_text)
49
 
50
  # Parse the text into an actual JSON dictionary for the Gradio UI
@@ -59,12 +66,48 @@ def extract_data(raw_text, fields_to_extract):
59
  except Exception as e:
60
  return {"error": str(e)}
61
 
62
- # -------------------------
63
- # Build the Gradio UI
64
- # -------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
66
  gr.Markdown("# 🛟 The Data Rescuer")
67
- gr.Markdown(f"**Powered by `{MODEL_ID}`** | Turn messy transcripts, notes, and OCR text into clean JSON data.")
68
 
69
  with gr.Row():
70
  # Left Column: Inputs
@@ -86,13 +129,40 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
86
  # Right Column: Output
87
  with gr.Column():
88
  json_output = gr.JSON(label="Structured Output")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
- # Connect the button to the function
91
  extract_btn.click(
92
  fn=extract_data,
93
  inputs=[raw_input, schema_input],
94
  outputs=json_output
95
  )
 
 
 
 
 
 
 
96
 
97
  # Launch the app
98
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import json
3
  import re
4
+ import os
5
+ import csv
6
+ import tempfile
7
  from huggingface_hub import InferenceClient
8
 
9
  # Replace this with your exact model repo ID
10
  MODEL_ID = "tensorvizion/O-wen-4.6"
11
 
12
+ # Securely load the Hugging Face token from Space secrets
13
+ hf_token = os.environ.get("HF_TOKEN")
14
+
15
+ # Initialize the HF inference client with the token
16
+ client = InferenceClient(model=MODEL_ID, token=hf_token)
17
 
18
  def extract_data(raw_text, fields_to_extract):
19
+ if not hf_token:
20
+ return {"error": "HF_TOKEN secret is missing. Please add your Hugging Face Access Token to the Space Secrets."}
21
+
22
  if not raw_text.strip() or not fields_to_extract.strip():
23
  return {"error": "Please provide both raw text and fields to extract."}
24
 
 
50
  output_text = response.choices[0].message.content.strip()
51
 
52
  # Fallback: Safely strip markdown code blocks without using complex regex
 
53
  if output_text.startswith("```"):
54
+ output_text = re.sub(r"^```(?:json)?\n?", "", output_text)
 
55
  output_text = re.sub(r"\n?```$", "", output_text)
56
 
57
  # Parse the text into an actual JSON dictionary for the Gradio UI
 
66
  except Exception as e:
67
  return {"error": str(e)}
68
 
69
+ def generate_csv(json_data):
70
+ """Converts the JSON output into a downloadable CSV file."""
71
+ if not json_data or "error" in json_data:
72
+ return None
73
+
74
+ # Normalize data into a list of dictionaries for the CSV writer
75
+ if isinstance(json_data, dict):
76
+ data_list = [json_data]
77
+ elif isinstance(json_data, list):
78
+ data_list = json_data
79
+ else:
80
+ return None
81
+
82
+ # Create a secure temporary file to hold the CSV
83
+ temp_dir = tempfile.mkdtemp()
84
+ csv_path = os.path.join(temp_dir, "extracted_data.csv")
85
+
86
+ try:
87
+ with open(csv_path, 'w', newline='', encoding='utf-8') as f:
88
+ # Gather all possible column headers from the JSON keys
89
+ headers = set()
90
+ for item in data_list:
91
+ if isinstance(item, dict):
92
+ headers.update(item.keys())
93
+ headers = list(headers)
94
+
95
+ writer = csv.DictWriter(f, fieldnames=headers)
96
+ writer.writeheader()
97
+
98
+ for item in data_list:
99
+ if isinstance(item, dict):
100
+ # Convert nested lists/dicts to strings so the CSV doesn't break
101
+ flat_item = {k: (str(v) if isinstance(v, (list, dict)) else v) for k, v in item.items()}
102
+ writer.writerow(flat_item)
103
+
104
+ return csv_path
105
+ except Exception as e:
106
+ return None
107
+
108
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
109
  gr.Markdown("# 🛟 The Data Rescuer")
110
+ gr.Markdown(f"**Powered by `{MODEL_ID}`** | Turn messy transcripts, notes, and OCR text into clean JSON and CSV data.")
111
 
112
  with gr.Row():
113
  # Left Column: Inputs
 
129
  # Right Column: Output
130
  with gr.Column():
131
  json_output = gr.JSON(label="Structured Output")
132
+
133
+ # CSV Export UI
134
+ export_btn = gr.Button("⬇️ Download as CSV", variant="secondary")
135
+ csv_output = gr.File(label="Your CSV File", interactive=False)
136
+
137
+ gr.Markdown("### Try it out with these examples:")
138
+ gr.Examples(
139
+ examples=[
140
+ [
141
+ "Hey guys, quick recap of today's sync. Sarah is going to handle the frontend React components by next Tuesday. John, you need to fix the database migration issue before Friday. Also, our client 'Acme Corp' wants the final delivery by October 15th.",
142
+ "Task Owner, Task Description, Deadline, Client Name"
143
+ ],
144
+ [
145
+ "Invoice #99214. From: BlueTech Software. To: Jane Doe. Items: 1x Server Maintenance ($500), 2x Cloud Storage ($100 each). Total due: $700. Please pay by end of month.",
146
+ "Invoice Number, Sender, Recipient, Items (list of names and prices), Total Amount"
147
+ ]
148
+ ],
149
+ inputs=[raw_input, schema_input],
150
+ label="Click an example to populate the inputs"
151
+ )
152
 
153
+ # 1. Connect extraction button
154
  extract_btn.click(
155
  fn=extract_data,
156
  inputs=[raw_input, schema_input],
157
  outputs=json_output
158
  )
159
+
160
+ # 2. Connect CSV export button
161
+ export_btn.click(
162
+ fn=generate_csv,
163
+ inputs=[json_output],
164
+ outputs=[csv_output]
165
+ )
166
 
167
  # Launch the app
168
  if __name__ == "__main__":