TensorVizion commited on
Commit
0db4a3c
·
verified ·
1 Parent(s): 866f975

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -60
app.py CHANGED
@@ -4,11 +4,9 @@ import re
4
  from huggingface_hub import InferenceClient
5
 
6
  # Replace this with your exact model repo ID
7
- # e.g., "tensorvizion/O-wen-4.6"
8
  MODEL_ID = "tensorvizion/O-wen-4.6"
9
 
10
  # Initialize the HF inference client
11
- # Note: In a private Space, you would pass token=os.environ["HF_TOKEN"]
12
  client = InferenceClient(model=MODEL_ID)
13
 
14
  def extract_data(raw_text, fields_to_extract):
@@ -37,64 +35,12 @@ def extract_data(raw_text, fields_to_extract):
37
  response = client.chat_completion(
38
  messages=messages,
39
  max_tokens=1024,
40
- temperature=0.1, # Low temperature for more deterministic/factual data extraction
41
  )
42
 
43
- output_text = response.choices[0].message.content.strip()
44
 
45
- # Fallback: Strip markdown code blocks if the model adds them
46
- # This prevents the json parse step from crashing
47
- json_match = re.search(r'
48
- http://googleusercontent.com/immersive_entry_chip/0
49
-
50
-
51
- # Parse the text into an actual JSON dictionary for the Gradio UI
52
- structured_data = json.loads(output_text)
53
- return structured_data
54
-
55
- except json.JSONDecodeError:
56
- return {
57
- "error": "The model failed to return valid JSON. It returned this instead:",
58
- "raw_output": output_text
59
- }
60
- except Exception as e:
61
- return {"error": str(e)}
62
-
63
- # -------------------------
64
- # Build the Gradio UI
65
- # -------------------------
66
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
67
- gr.Markdown("# 🛟 The Data Rescuer")
68
- gr.Markdown(f"**Powered by `{MODEL_ID}`** | Turn messy transcripts, notes, and OCR text into clean JSON data.")
69
-
70
- with gr.Row():
71
- # Left Column: Inputs
72
- with gr.Column():
73
- raw_input = gr.Textbox(
74
- label="1. Paste Unstructured Text",
75
- placeholder="Paste your messy meeting notes, emails, or raw text here...",
76
- lines=12
77
- )
78
-
79
- schema_input = gr.Textbox(
80
- label="2. What fields do you want to extract?",
81
- placeholder="e.g., Company Name, Contact Person, Deadline, Action Items (list)",
82
- lines=3
83
- )
84
-
85
- extract_btn = gr.Button("Extract Structured Data", variant="primary")
86
-
87
- # Right Column: Output
88
- with gr.Column():
89
- json_output = gr.JSON(label="Structured Output")
90
-
91
- # Connect the button to the function
92
- extract_btn.click(
93
- fn=extract_data,
94
- inputs=[raw_input, schema_input],
95
- outputs=json_output
96
- )
97
-
98
- # Launch the app
99
- if __name__ == "__main__":
100
- demo.launch()
 
4
  from huggingface_hub import InferenceClient
5
 
6
  # Replace this with your exact model repo ID
 
7
  MODEL_ID = "tensorvizion/O-wen-4.6"
8
 
9
  # Initialize the HF inference client
 
10
  client = InferenceClient(model=MODEL_ID)
11
 
12
  def extract_data(raw_text, fields_to_extract):
 
35
  response = client.chat_completion(
36
  messages=messages,
37
  max_tokens=1024,
38
+ temperature=0.1,
39
  )
40
 
41
+ output_text = response.choices[0].message.content.strip()
42
 
43
+ # Fallback: Safely strip markdown code blocks without using complex regex
44
+ # that might break code editors during copy-pasting
45
+ if output_text.startswith("```"):
46
+ # Remove the starting ```json or