Spaces:

Anushree1
/

MultilingualDocAnalyser

Runtime error

App Files Files Community

Anushree1 commited on Mar 9, 2025

Commit

46279fd

verified ·

1 Parent(s): e559725

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -21

app.py CHANGED Viewed

@@ -2,15 +2,21 @@ import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
-# Load Fine-Tuned Model & Tokenizer (Ensure path points to your fine-tuned model)
-MODEL_PATH = "path_to_fine_tuned_model"  # Replace with the correct model path
-tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
-model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
-# Set model to evaluation mode (Disables dropout for stable predictions)
-model.eval()
-# Define Label Mapping (Modify based on your dataset)
 LABEL_MAPPING = {
     0: "Contract",
     1: "Invoice",
@@ -19,12 +25,8 @@ LABEL_MAPPING = {
     4: "Marketing Material"
 }
-# Optimized Classification Function
 def classify_text(text):
-    if not text.strip():
-        return "Please enter a valid business document text."
-    # Tokenize Input
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
     with torch.no_grad():
@@ -33,25 +35,23 @@ def classify_text(text):
     # Convert logits to probabilities
     probs = torch.nn.functional.softmax(outputs.logits, dim=1)
-    # Get predicted label index
     label_idx = torch.argmax(probs, dim=1).item()
     # Retrieve category name
     category = LABEL_MAPPING.get(label_idx, "Unknown")
-    # Debugging Info (Uncomment for testing)
-    print(f"Logits: {outputs.logits}")
-    print(f"Probabilities: {probs}")
-    return f"Predicted Category: {category} (Confidence: {probs[0][label_idx]:.2f})"
 # Gradio UI
 demo = gr.Interface(
     fn=classify_text,
     inputs=gr.Textbox(lines=4, placeholder="Enter business document text..."),
     outputs="text",
-    title="Multilingual Business Document Classifier",
-    description="Classifies business documents into predefined categories using a multilingual model."
 )
-demo.launch()

 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
+# Define the model path (Update this with your fine-tuned model's path or Hugging Face repo)
+MODEL_PATH = "your-huggingface-username/your-fine-tuned-model"
+# Authenticate if using a private model (Uncomment and set your token)
+# TOKEN = "your_hf_access_token"
+# Load Model & Tokenizer
+try:
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)  # , use_auth_token=TOKEN if needed
+    model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)  # , use_auth_token=TOKEN if needed
+except Exception as e:
+    print(f"Error loading model: {e}")
+    exit()
+# Label Mapping
 LABEL_MAPPING = {
     0: "Contract",
     1: "Invoice",
     4: "Marketing Material"
 }
+# Classification Function
 def classify_text(text):
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
     with torch.no_grad():
     # Convert logits to probabilities
     probs = torch.nn.functional.softmax(outputs.logits, dim=1)
+    # Get top predicted label
     label_idx = torch.argmax(probs, dim=1).item()
+    confidence = probs[0][label_idx].item()
     # Retrieve category name
     category = LABEL_MAPPING.get(label_idx, "Unknown")
+    return f"Predicted Category: {category} (Confidence: {confidence:.2f})"
 # Gradio UI
 demo = gr.Interface(
     fn=classify_text,
     inputs=gr.Textbox(lines=4, placeholder="Enter business document text..."),
     outputs="text",
+    title="Multilingual Business Document Classifier"
 )
+# Run the app
+if __name__ == "__main__":
+    demo.launch()