TensorVizion commited on
Commit
5fe2bf9
Β·
verified Β·
1 Parent(s): dfbd07a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -36
app.py CHANGED
@@ -6,8 +6,7 @@ import tempfile
6
  from huggingface_hub import InferenceClient
7
 
8
  # Replace this with your exact model repo ID
9
- # Note: Ensure exact casing. If the model is a GGUF, we will need to change how this runs.
10
- MODEL_ID = "Qwen/Qwen2.5-7B-Instruct"
11
 
12
  # Securely load the Hugging Face token from Space secrets
13
  hf_token = os.environ.get("HF_TOKEN")
@@ -15,12 +14,61 @@ hf_token = os.environ.get("HF_TOKEN")
15
  # Initialize the HF inference client with the token
16
  client = InferenceClient(model=MODEL_ID, token=hf_token)
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def extract_data(raw_text, fields_to_extract):
19
  if not hf_token:
20
- return {"error": "HF_TOKEN secret is missing. Please add your Hugging Face Access Token to the Space Secrets."}
21
 
22
  if not raw_text.strip() or not fields_to_extract.strip():
23
- return {"error": "Please provide both raw text and fields to extract."}
24
 
25
  # Construct the system instruction
26
  system_prompt = (
@@ -54,48 +102,54 @@ def extract_data(raw_text, fields_to_extract):
54
  if cleaned_text.startswith("```"):
55
  lines = cleaned_text.splitlines()
56
  if len(lines) >= 2:
57
- # Discard the opening line (e.g., ```json or ```)
58
  if lines[0].startswith("```"):
59
  lines = lines[1:]
60
- # Discard the closing line (e.g., ```)
61
  if lines and lines[-1].strip() == "```":
62
  lines = lines[:-1]
63
  cleaned_text = "\n".join(lines).strip()
64
 
65
  # Parse the text into an actual JSON dictionary
66
  structured_data = json.loads(cleaned_text)
67
- return structured_data
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  except json.JSONDecodeError:
70
- return {
71
  "error": "The model failed to return valid JSON. It returned this instead:",
72
  "raw_output": output_text
73
  }
 
74
  except Exception as e:
75
  error_msg = str(e)
76
- # Enhanced error handling for model connectivity issues
77
  if "model_not_found" in error_msg or "does not exist" in error_msg:
78
- return {
79
  "error": f"The model '{MODEL_ID}' was not found on Hugging Face.",
80
  "troubleshooting": [
81
- "1. Check your Hugging Face repo for typos in the MODEL_ID string (it is case-sensitive).",
82
- "2. If the model is Private, ensure your HF_TOKEN has read access.",
83
- "3. If your model is a GGUF or LoRA adapter, the Serverless API does not support it directly.",
84
- "Test by temporarily changing MODEL_ID to 'Qwen/Qwen2.5-7B-Instruct' to verify the app works."
85
- ],
86
- "raw_error": error_msg
87
  }
88
- return {"error": error_msg}
 
89
 
90
  def generate_csv(json_data):
91
  """Converts the JSON output into a downloadable CSV file."""
92
  if not json_data or "error" in json_data:
93
  return None
94
 
95
- # Normalize data into a list of dictionaries for the CSV writer
96
  if isinstance(json_data, dict):
97
- if "error" in json_data:
98
- return None
99
  data_list = [json_data]
100
  elif isinstance(json_data, list):
101
  data_list = json_data
@@ -108,7 +162,6 @@ def generate_csv(json_data):
108
 
109
  try:
110
  with open(csv_path, 'w', newline='', encoding='utf-8') as f:
111
- # Gather all possible column headers from the JSON keys
112
  headers = set()
113
  for item in data_list:
114
  if isinstance(item, dict):
@@ -123,7 +176,6 @@ def generate_csv(json_data):
123
 
124
  for item in data_list:
125
  if isinstance(item, dict):
126
- # Convert nested lists/dicts to strings so the CSV doesn't break
127
  flat_item = {k: (str(v) if isinstance(v, (list, dict)) else v) for k, v in item.items()}
128
  writer.writerow(flat_item)
129
 
@@ -131,13 +183,23 @@ def generate_csv(json_data):
131
  except Exception as e:
132
  return None
133
 
134
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
135
- gr.Markdown("# πŸ›Ÿ The Data Rescuer")
136
- gr.Markdown(f"**Powered by `{MODEL_ID}`** | Turn messy transcripts, notes, and OCR text into clean JSON and CSV data.")
 
 
 
 
 
 
 
 
 
 
137
 
138
  with gr.Row():
139
  # Left Column: Inputs
140
- with gr.Column():
141
  raw_input = gr.Textbox(
142
  label="1. Paste Unstructured Text",
143
  placeholder="Paste your messy meeting notes, emails, or raw text here...",
@@ -150,16 +212,29 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
150
  lines=3
151
  )
152
 
153
- extract_btn = gr.Button("Extract Structured Data", variant="primary")
154
 
155
- # Right Column: Output
156
- with gr.Column():
157
- json_output = gr.JSON(label="Structured Output")
 
 
 
 
 
 
 
 
 
158
 
159
- # CSV Export UI
160
- export_btn = gr.Button("⬇️ Download as CSV", variant="secondary")
161
- csv_output = gr.File(label="Your CSV File", interactive=False)
 
162
 
 
 
 
163
  gr.Markdown("### Try it out with these examples:")
164
  gr.Examples(
165
  examples=[
@@ -176,14 +251,17 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
176
  label="Click an example to populate the inputs"
177
  )
178
 
179
- # 1. Connect extraction button
 
 
 
180
  extract_btn.click(
181
  fn=extract_data,
182
  inputs=[raw_input, schema_input],
183
- outputs=json_output
184
  )
185
 
186
- # 2. Connect CSV export button
187
  export_btn.click(
188
  fn=generate_csv,
189
  inputs=[json_output],
 
6
  from huggingface_hub import InferenceClient
7
 
8
  # Replace this with your exact model repo ID
9
+ MODEL_ID = "tensorvizion/RagSeek-v1"
 
10
 
11
  # Securely load the Hugging Face token from Space secrets
12
  hf_token = os.environ.get("HF_TOKEN")
 
14
  # Initialize the HF inference client with the token
15
  client = InferenceClient(model=MODEL_ID, token=hf_token)
16
 
17
+ # -------------------------
18
+ # Custom CSS Styling
19
+ # -------------------------
20
+ custom_css = """
21
+ .hero-container {
22
+ background: linear-gradient(135deg, #6366f1 0%, #14b8a6 100%);
23
+ padding: 2.5rem;
24
+ border-radius: 20px;
25
+ color: white;
26
+ margin-bottom: 2rem;
27
+ box-shadow: 0 10px 25px -5px rgba(99, 102, 241, 0.2);
28
+ }
29
+ .hero-container h1 {
30
+ color: white !important;
31
+ font-size: 2.5rem !important;
32
+ font-weight: 800 !important;
33
+ margin-bottom: 0.5rem;
34
+ text-shadow: 0 2px 4px rgba(0,0,0,0.1);
35
+ }
36
+ .hero-container p {
37
+ color: rgba(255, 255, 255, 0.9) !important;
38
+ font-size: 1.1rem !important;
39
+ }
40
+ .primary-btn {
41
+ background: linear-gradient(90deg, #6366f1 0%, #14b8a6 100%) !important;
42
+ border: none !important;
43
+ color: white !important;
44
+ font-weight: 600 !important;
45
+ border-radius: 10px !important;
46
+ transition: all 0.3s ease !important;
47
+ padding: 12px 24px !important;
48
+ }
49
+ .primary-btn:hover {
50
+ transform: translateY(-2px);
51
+ box-shadow: 0 8px 20px -5px rgba(99, 102, 241, 0.4);
52
+ }
53
+ .secondary-btn {
54
+ border-radius: 10px !important;
55
+ font-weight: 600 !important;
56
+ }
57
+ .feedback-card {
58
+ border-left: 4px solid #6366f1;
59
+ background-color: rgba(99, 102, 241, 0.05);
60
+ }
61
+ """
62
+
63
+ # -------------------------
64
+ # Helper & Extraction Logic
65
+ # -------------------------
66
  def extract_data(raw_text, fields_to_extract):
67
  if not hf_token:
68
+ return {"error": "HF_TOKEN secret is missing. Please add your Hugging Face Access Token to the Space Secrets."}, [["Error", "HF_TOKEN missing"]]
69
 
70
  if not raw_text.strip() or not fields_to_extract.strip():
71
+ return {"error": "Please provide both raw text and fields to extract."}, [["Error", "Incomplete inputs"]]
72
 
73
  # Construct the system instruction
74
  system_prompt = (
 
102
  if cleaned_text.startswith("```"):
103
  lines = cleaned_text.splitlines()
104
  if len(lines) >= 2:
 
105
  if lines[0].startswith("```"):
106
  lines = lines[1:]
 
107
  if lines and lines[-1].strip() == "```":
108
  lines = lines[:-1]
109
  cleaned_text = "\n".join(lines).strip()
110
 
111
  # Parse the text into an actual JSON dictionary
112
  structured_data = json.loads(cleaned_text)
113
+
114
+ # Convert JSON structure to a displayable 2D list for the Table view
115
+ table_data = []
116
+ if isinstance(structured_data, dict):
117
+ for k, v in structured_data.items():
118
+ # Stringify lists or nested objects to fit cleanly inside cells
119
+ val_str = ", ".join(v) if isinstance(v, list) else str(v)
120
+ table_data.append([k, val_str])
121
+ elif isinstance(structured_data, list):
122
+ for idx, item in enumerate(structured_data):
123
+ table_data.append([f"Item {idx + 1}", str(item)])
124
+
125
+ return structured_data, table_data
126
 
127
  except json.JSONDecodeError:
128
+ error_dict = {
129
  "error": "The model failed to return valid JSON. It returned this instead:",
130
  "raw_output": output_text
131
  }
132
+ return error_dict, [["Error", "Invalid JSON parsed"]]
133
  except Exception as e:
134
  error_msg = str(e)
 
135
  if "model_not_found" in error_msg or "does not exist" in error_msg:
136
+ err_dict = {
137
  "error": f"The model '{MODEL_ID}' was not found on Hugging Face.",
138
  "troubleshooting": [
139
+ "1. Check your Hugging Face repo for typos (case-sensitive).",
140
+ "2. Verify HF_TOKEN secret read permissions.",
141
+ "3. GGUF or LoRA adapter models are not directly supported by the Serverless API."
142
+ ]
 
 
143
  }
144
+ return err_dict, [["Connection Error", "Model Not Found"]]
145
+ return {"error": error_msg}, [["Error", error_msg]]
146
 
147
  def generate_csv(json_data):
148
  """Converts the JSON output into a downloadable CSV file."""
149
  if not json_data or "error" in json_data:
150
  return None
151
 
 
152
  if isinstance(json_data, dict):
 
 
153
  data_list = [json_data]
154
  elif isinstance(json_data, list):
155
  data_list = json_data
 
162
 
163
  try:
164
  with open(csv_path, 'w', newline='', encoding='utf-8') as f:
 
165
  headers = set()
166
  for item in data_list:
167
  if isinstance(item, dict):
 
176
 
177
  for item in data_list:
178
  if isinstance(item, dict):
 
179
  flat_item = {k: (str(v) if isinstance(v, (list, dict)) else v) for k, v in item.items()}
180
  writer.writerow(flat_item)
181
 
 
183
  except Exception as e:
184
  return None
185
 
186
+ # -------------------------
187
+ # Build the Gradio UI
188
+ # -------------------------
189
+ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
190
+
191
+ # Styled Header Block
192
+ with gr.HTML(elem_classes="hero-container"):
193
+ gr.Markdown(
194
+ f"""
195
+ # πŸ›Ÿ The Data Rescuer
196
+ Turn messy logs, disorganized lists, automated transcripts, and raw OCR scripts into highly structured business-ready assets β€” powered by `{MODEL_ID}`.
197
+ """
198
+ )
199
 
200
  with gr.Row():
201
  # Left Column: Inputs
202
+ with gr.Column(scale=1):
203
  raw_input = gr.Textbox(
204
  label="1. Paste Unstructured Text",
205
  placeholder="Paste your messy meeting notes, emails, or raw text here...",
 
212
  lines=3
213
  )
214
 
215
+ extract_btn = gr.Button("πŸš€ Extract Structured Data", variant="primary", elem_classes="primary-btn")
216
 
217
+ # Right Column: Multi-view Output Panels
218
+ with gr.Column(scale=1):
219
+ with gr.Tabs():
220
+ with gr.TabItem("πŸ“Š Structured Table"):
221
+ table_output = gr.Dataframe(
222
+ headers=["Field Name", "Extracted Value"],
223
+ datatype=["str", "str"],
224
+ interactive=False,
225
+ wrap=True
226
+ )
227
+ with gr.TabItem("πŸ” Raw JSON Tree"):
228
+ json_output = gr.JSON(label="JSON Object")
229
 
230
+ # Action controls below outputs
231
+ with gr.Row():
232
+ export_btn = gr.Button("πŸ’Ύ Build Export File", variant="secondary", elem_classes="secondary-btn")
233
+ csv_output = gr.File(label="Ready for Download", interactive=False)
234
 
235
+ # -------------------------
236
+ # Examples Panel
237
+ # -------------------------
238
  gr.Markdown("### Try it out with these examples:")
239
  gr.Examples(
240
  examples=[
 
251
  label="Click an example to populate the inputs"
252
  )
253
 
254
+ # -------------------------
255
+ # Event Connections
256
+ # -------------------------
257
+ # 1. Connect extraction button to both the Table View and JSON Tree
258
  extract_btn.click(
259
  fn=extract_data,
260
  inputs=[raw_input, schema_input],
261
+ outputs=[json_output, table_output]
262
  )
263
 
264
+ # 2. Connect CSV generation
265
  export_btn.click(
266
  fn=generate_csv,
267
  inputs=[json_output],