TensorVizion commited on
Commit
080d73e
·
verified ·
1 Parent(s): f79c1f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -11
app.py CHANGED
@@ -1,12 +1,12 @@
1
  import gradio as gr
2
  import json
3
- import re
4
  import os
5
  import csv
6
  import tempfile
7
  from huggingface_hub import InferenceClient
8
 
9
  # Replace this with your exact model repo ID
 
10
  MODEL_ID = "tensorvizion/O-wen-4.6"
11
 
12
  # Securely load the Hugging Face token from Space secrets
@@ -22,7 +22,7 @@ def extract_data(raw_text, fields_to_extract):
22
  if not raw_text.strip() or not fields_to_extract.strip():
23
  return {"error": "Please provide both raw text and fields to extract."}
24
 
25
- # Construct the system instruction for O-wen 4.6
26
  system_prompt = (
27
  "You are an expert data extraction assistant. Your job is to extract specific "
28
  "information from messy, unstructured text and output it as clean, valid JSON.\n"
@@ -40,7 +40,7 @@ def extract_data(raw_text, fields_to_extract):
40
  ]
41
 
42
  try:
43
- # Call O-wen 4.6 via the chat completion API
44
  response = client.chat_completion(
45
  messages=messages,
46
  max_tokens=1024,
@@ -49,13 +49,21 @@ def extract_data(raw_text, fields_to_extract):
49
 
50
  output_text = response.choices[0].message.content.strip()
51
 
52
- # Fallback: Safely strip markdown code blocks without using complex regex
53
- if output_text.startswith("```"):
54
- output_text = re.sub(r"^```(?:json)?\n?", "", output_text)
55
- output_text = re.sub(r"\n?```$", "", output_text)
56
-
57
- # Parse the text into an actual JSON dictionary for the Gradio UI
58
- structured_data = json.loads(output_text.strip())
 
 
 
 
 
 
 
 
59
  return structured_data
60
 
61
  except json.JSONDecodeError:
@@ -64,7 +72,20 @@ def extract_data(raw_text, fields_to_extract):
64
  "raw_output": output_text
65
  }
66
  except Exception as e:
67
- return {"error": str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  def generate_csv(json_data):
70
  """Converts the JSON output into a downloadable CSV file."""
@@ -73,6 +94,8 @@ def generate_csv(json_data):
73
 
74
  # Normalize data into a list of dictionaries for the CSV writer
75
  if isinstance(json_data, dict):
 
 
76
  data_list = [json_data]
77
  elif isinstance(json_data, list):
78
  data_list = json_data
@@ -92,6 +115,9 @@ def generate_csv(json_data):
92
  headers.update(item.keys())
93
  headers = list(headers)
94
 
 
 
 
95
  writer = csv.DictWriter(f, fieldnames=headers)
96
  writer.writeheader()
97
 
 
1
  import gradio as gr
2
  import json
 
3
  import os
4
  import csv
5
  import tempfile
6
  from huggingface_hub import InferenceClient
7
 
8
  # Replace this with your exact model repo ID
9
+ # Note: Ensure exact casing. If the model is a GGUF, we will need to change how this runs.
10
  MODEL_ID = "tensorvizion/O-wen-4.6"
11
 
12
  # Securely load the Hugging Face token from Space secrets
 
22
  if not raw_text.strip() or not fields_to_extract.strip():
23
  return {"error": "Please provide both raw text and fields to extract."}
24
 
25
+ # Construct the system instruction
26
  system_prompt = (
27
  "You are an expert data extraction assistant. Your job is to extract specific "
28
  "information from messy, unstructured text and output it as clean, valid JSON.\n"
 
40
  ]
41
 
42
  try:
43
+ # Call the model via the chat completion API
44
  response = client.chat_completion(
45
  messages=messages,
46
  max_tokens=1024,
 
49
 
50
  output_text = response.choices[0].message.content.strip()
51
 
52
+ # Fallback: Safely strip markdown code blocks without regular expressions
53
+ cleaned_text = output_text
54
+ if cleaned_text.startswith("```"):
55
+ lines = cleaned_text.splitlines()
56
+ if len(lines) >= 2:
57
+ # Discard the opening line (e.g., ```json or ```)
58
+ if lines[0].startswith("```"):
59
+ lines = lines[1:]
60
+ # Discard the closing line (e.g., ```)
61
+ if lines and lines[-1].strip() == "```":
62
+ lines = lines[:-1]
63
+ cleaned_text = "\n".join(lines).strip()
64
+
65
+ # Parse the text into an actual JSON dictionary
66
+ structured_data = json.loads(cleaned_text)
67
  return structured_data
68
 
69
  except json.JSONDecodeError:
 
72
  "raw_output": output_text
73
  }
74
  except Exception as e:
75
+ error_msg = str(e)
76
+ # Enhanced error handling for model connectivity issues
77
+ if "model_not_found" in error_msg or "does not exist" in error_msg:
78
+ return {
79
+ "error": f"The model '{MODEL_ID}' was not found on Hugging Face.",
80
+ "troubleshooting": [
81
+ "1. Check your Hugging Face repo for typos in the MODEL_ID string (it is case-sensitive).",
82
+ "2. If the model is Private, ensure your HF_TOKEN has read access.",
83
+ "3. If your model is a GGUF or LoRA adapter, the Serverless API does not support it directly.",
84
+ "Test by temporarily changing MODEL_ID to 'Qwen/Qwen2.5-7B-Instruct' to verify the app works."
85
+ ],
86
+ "raw_error": error_msg
87
+ }
88
+ return {"error": error_msg}
89
 
90
  def generate_csv(json_data):
91
  """Converts the JSON output into a downloadable CSV file."""
 
94
 
95
  # Normalize data into a list of dictionaries for the CSV writer
96
  if isinstance(json_data, dict):
97
+ if "error" in json_data:
98
+ return None
99
  data_list = [json_data]
100
  elif isinstance(json_data, list):
101
  data_list = json_data
 
115
  headers.update(item.keys())
116
  headers = list(headers)
117
 
118
+ if not headers:
119
+ return None
120
+
121
  writer = csv.DictWriter(f, fieldnames=headers)
122
  writer.writeheader()
123