Spaces:

Deevyankar
/

Handouts

Sleeping

Deevyankar commited on Sep 20, 2025

Commit

42a8447

verified ·

1 Parent(s): 6310783

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,7 +7,8 @@ import io
 def extract_text_from_pdf(uploaded_file):
     try:
-        file_bytes = uploaded_file.read()
         doc = fitz.open(stream=file_bytes, filetype="pdf")
         text = ""
         for page in doc:
@@ -16,31 +17,30 @@ def extract_text_from_pdf(uploaded_file):
                 text += page_text + "\n"
         return text.strip()
     except Exception as e:
-        return f"Error: {str(e)}"
 def extract_los(lo_file):
-    name = getattr(lo_file, "name", "")
-    ext = name.lower().split('.')[-1]
-    if ext == "txt":
-        return lo_file.read().decode("utf-8").splitlines()
-    elif ext == "docx":
-        file_bytes = io.BytesIO(lo_file.read())
-        doc = Document(file_bytes)
-        return [p.text.strip() for p in doc.paragraphs if p.text.strip()]
-    else:
-        return []
 def compare_handouts(old_pdf, new_pdf, lo_file):
     old_text = extract_text_from_pdf(old_pdf)
     new_text = extract_text_from_pdf(new_pdf)
     los = extract_los(lo_file)
-    print(old_text)
-    print("\n")
-    print(new_text)
-    print("\n")
-    print(los)
     if not old_text or not new_text:
         return "❗ One or both PDFs may not contain extractable text."

 def extract_text_from_pdf(uploaded_file):
     try:
+        # Fix: handle both bytes and file-like
+        file_bytes = uploaded_file if isinstance(uploaded_file, bytes) else uploaded_file.read()
         doc = fitz.open(stream=file_bytes, filetype="pdf")
         text = ""
         for page in doc:
                 text += page_text + "\n"
         return text.strip()
     except Exception as e:
+        return f"Error extracting text: {str(e)}"
 def extract_los(lo_file):
+    try:
+        file_bytes = lo_file if isinstance(lo_file, bytes) else lo_file.read()
+        name = getattr(lo_file, "name", "")
+        ext = name.lower().split('.')[-1] if name else "docx"
+        if ext == "txt":
+            return file_bytes.decode("utf-8").splitlines()
+        elif ext == "docx":
+            file_stream = io.BytesIO(file_bytes)
+            doc = Document(file_stream)
+            return [p.text.strip() for p in doc.paragraphs if p.text.strip()]
+        else:
+            return []
+    except Exception as e:
+        return [f"Error loading LOs: {str(e)}"]
 def compare_handouts(old_pdf, new_pdf, lo_file):
     old_text = extract_text_from_pdf(old_pdf)
     new_text = extract_text_from_pdf(new_pdf)
     los = extract_los(lo_file)
     if not old_text or not new_text:
         return "❗ One or both PDFs may not contain extractable text."