Spaces:

roshcheeku
/

mcqvid

Runtime error

App Files Files Community

roshcheeku commited on Jun 7, 2025

Commit

8385a71

verified ·

1 Parent(s): 00c7c1d

Update model_utils.py

Browse files

Files changed (1) hide show

model_utils.py +26 -19

model_utils.py CHANGED Viewed

@@ -2,38 +2,45 @@ import os
 # Fix: Redirect Hugging Face cache to a writable folder
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
-os.environ["HF_HOME"] = "/tmp/hf_cache"  # new standard from transformers v5+
 from transformers import pipeline
-# Initialize zero-shot classification pipeline
 classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
-# Define labels for classification
 labels = ["question", "option", "answer", "other"]
-def classify_chunk(text):
-    result = classifier(text, labels)
-    return result['labels'][0]  # Return the top predicted label
 def extract_mcqs_with_model(text):
-    # Split text into chunks, skipping empty ones
     chunks = [chunk.strip() for chunk in text.split("\n\n") if chunk.strip()]
     mcqs = []
     current = {"question": "", "options": [], "answer": ""}
-    for chunk in chunks:
-        label = classify_chunk(chunk)
-        if label == "question":
-            if current["question"]:
-                mcqs.append(current)
-                current = {"question": "", "options": [], "answer": ""}
-            current["question"] = chunk
-        elif label == "option":
-            current["options"].append(chunk)
-        elif label == "answer":
-            current["answer"] = chunk
     if current["question"]:
         mcqs.append(current)
     return mcqs

 # Fix: Redirect Hugging Face cache to a writable folder
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
+os.environ["HF_HOME"] = "/tmp/hf_cache"  # New standard from transformers v5+
 from transformers import pipeline
+# Load the model once at the start — this avoids reloading on every request
 classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+# Define the classification labels
 labels = ["question", "option", "answer", "other"]
 def extract_mcqs_with_model(text):
+    """
+    Extract MCQs from a given large body of text using zero-shot classification.
+    Optimized for large documents by batch processing.
+    """
+    # Clean and split text into meaningful chunks
     chunks = [chunk.strip() for chunk in text.split("\n\n") if chunk.strip()]
     mcqs = []
     current = {"question": "", "options": [], "answer": ""}
+    # Process chunks in batches for speed (e.g., 5 chunks at a time)
+    batch_size = 10
+    for i in range(0, len(chunks), batch_size):
+        batch = chunks[i:i+batch_size]
+        results = classifier(batch, labels)
+        for chunk, result in zip(batch, results):
+            label = result['labels'][0]
+            if label == "question":
+                if current["question"]:
+                    mcqs.append(current)
+                    current = {"question": "", "options": [], "answer": ""}
+                current["question"] = chunk
+            elif label == "option":
+                current["options"].append(chunk)
+            elif label == "answer":
+                current["answer"] = chunk
     if current["question"]:
         mcqs.append(current)
     return mcqs