Spaces:

oberbics
/

HistorySpace

Sleeping

App Files Files Community

oberbics commited on Apr 13, 2025

Commit

432d5fe

verified ·

1 Parent(s): 39ee1aa

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -30

app.py CHANGED Viewed

@@ -3,26 +3,44 @@ import torch
 import json
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# Simple test function to debug button clicks
 def test_function(template, text):
-    print(f"Function called with template: {template[:30]} and text: {text[:30]}")
     return "Button clicked successfully", "Function was called"
-# Real extraction function
 def extract_info(template, text):
     try:
         # Format prompt according to NuExtract-1.5 requirements
         prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
         # Tokenize
         inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-        # Generate
         print("Generating output...")
         outputs = model.generate(
             **inputs,
             max_new_tokens=1000,
-            do_sample=False
         )
         # Decode and extract result
@@ -37,33 +55,19 @@ def extract_info(template, text):
         # Try to parse as JSON
         print("Parsing JSON...")
-        extracted = json.loads(json_text)
-        formatted = json.dumps(extracted, indent=2)
         return "✅ Success", formatted
     except Exception as e:
-        print(f"Error: {str(e)}")
         return f"❌ Error: {str(e)}", "{}"
-# Load model
-try:
-    print("Loading model...")
-    model_name = "numind/NuExtract-1.5"
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        torch_dtype=torch.float16,
-        device_map="auto",
-        trust_remote_code=True
-    )
-    print("Model loaded successfully")
-except Exception as e:
-    print(f"Model loading error: {e}")
-    # Create dummy function for testing UI
-    def extract_info(template, text):
-        return "Model failed to load", "Cannot process request"
-# Create a very simple interface
 with gr.Blocks() as demo:
     gr.Markdown("# NuExtract-1.5 Extraction Tool")
@@ -88,7 +92,7 @@ with gr.Blocks() as demo:
             status = gr.Textbox(label="Status")
             output = gr.Textbox(label="Output", lines=10)
-    # Connect both buttons to verify functionality
     test_btn.click(
         fn=test_function,
         inputs=[template, text],
@@ -101,6 +105,5 @@ with gr.Blocks() as demo:
         outputs=[status, output]
     )
-# Launch the app
 if __name__ == "__main__":
     demo.launch()

 import json
 from transformers import AutoModelForCausalLM, AutoTokenizer
+# Initialize model with error handling
+try:
+    tokenizer = AutoTokenizer.from_pretrained("numind/NuExtract-1.5")
+    model = AutoModelForCausalLM.from_pretrained(
+        "numind/NuExtract-1.5",
+        device_map="auto",
+        torch_dtype=torch.float16,
+        trust_remote_code=True
+    )
+    MODEL_LOADED = True
+    print("Model loaded successfully!")
+except Exception as e:
+    MODEL_LOADED = False
+    print(f"Model loading failed: {e}")
 def test_function(template, text):
+    print(f"Test function called with template: {template[:30]} and text: {text[:30]}")
     return "Button clicked successfully", "Function was called"
 def extract_info(template, text):
+    if not MODEL_LOADED:
+        return "❌ Model not loaded", "{}"
     try:
         # Format prompt according to NuExtract-1.5 requirements
         prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
+        print(f"Processing with prompt: {prompt[:100]}...")
         # Tokenize
         inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+        # Generate with cache disabled
         print("Generating output...")
         outputs = model.generate(
             **inputs,
             max_new_tokens=1000,
+            do_sample=False,
+            use_cache=False  # This disables the problematic cache
         )
         # Decode and extract result
         # Try to parse as JSON
         print("Parsing JSON...")
+        try:
+            extracted = json.loads(json_text)
+            formatted = json.dumps(extracted, indent=2)
+        except json.JSONDecodeError:
+            print(f"JSON parsing failed. Raw output: {json_text[:100]}...")
+            return "❌ JSON parsing error", json_text
         return "✅ Success", formatted
     except Exception as e:
+        print(f"Error in extraction: {str(e)}")
         return f"❌ Error: {str(e)}", "{}"
+# Create a simple interface
 with gr.Blocks() as demo:
     gr.Markdown("# NuExtract-1.5 Extraction Tool")
             status = gr.Textbox(label="Status")
             output = gr.Textbox(label="Output", lines=10)
+    # Connect both buttons
     test_btn.click(
         fn=test_function,
         inputs=[template, text],
         outputs=[status, output]
     )
 if __name__ == "__main__":
     demo.launch()