Spaces:

Deevyankar
/

Handouts

Sleeping

App Files Files Community

Deevyankar commited on Sep 20, 2025

Commit

8e6dd39

verified ·

1 Parent(s): bcf0a3a

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -19

app.py CHANGED Viewed

@@ -11,9 +11,8 @@ import numpy as np
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
-def extract_text_from_pdf(uploaded_file):
-    file_bytes = uploaded_file.read()
-    doc = fitz.open(stream=file_bytes, filetype="pdf")
     text = ""
     for page in doc:
         page_text = page.get_text()
@@ -21,18 +20,15 @@ def extract_text_from_pdf(uploaded_file):
             text += page_text + "\n"
     return text.strip()
-def extract_los(lo_file):
-    file_bytes = lo_file.read()
-    ext = lo_file.name.lower().split('.')[-1]
     if ext == "txt":
         return file_bytes.decode("utf-8").splitlines()
     elif ext == "docx":
         file_stream = io.BytesIO(file_bytes)
         doc = Document(file_stream)
         return [p.text.strip() for p in doc.paragraphs if p.text.strip()]
-    else:
-        return []
 def quality_check(new_text):
     words = new_text.split()
@@ -49,13 +45,13 @@ def find_relevant_los(content, los):
     vectorizer = TfidfVectorizer().fit_transform([content] + los)
     similarities = cosine_similarity(vectorizer[0:1], vectorizer[1:]).flatten()
     matched = []
-    scores_old = [round(np.random.uniform(1, 3), 1) for _ in los]  # simulate old matches
     scores_new = []
     for i, score in enumerate(similarities):
         if score > 0.2:
             matched.append(f"✓ {los[i]} (Match: {score:.2f})")
-        scores_new.append(round(score * 5, 1))  # convert similarity to scale of 5
     return matched, len(matched), scores_old, scores_new
@@ -86,13 +82,13 @@ def create_bar_chart(los, scores_old, scores_new):
     fig.tight_layout()
     return fig
-def compare_handouts(old_pdf, new_pdf, lo_file):
-    old_text = extract_text_from_pdf(old_pdf)
-    new_text = extract_text_from_pdf(new_pdf)
-    los = extract_los(lo_file)
     if not old_text or not new_text:
-        return "❗ Error in file(s)", "", "", None, None
     added_summary, added_lines, total_lines = summarize_added_lines(old_text, new_text)
     percent_change = (added_lines / max(total_lines, 1)) * 100
@@ -111,7 +107,7 @@ def compare_handouts(old_pdf, new_pdf, lo_file):
     return summary_output, lo_output, stats, chart
 iface = gr.Interface(
-    fn=compare_handouts,
     inputs=[
         gr.File(label="📤 Old Handout PDF", type="binary"),
         gr.File(label="📥 New Handout PDF", type="binary"),
@@ -123,8 +119,8 @@ iface = gr.Interface(
         gr.Textbox(label="📊 Stats & Quality", lines=5),
         gr.Plot(label="📉 LO Match Score Chart")
     ],
-    title="📘 Handout Comparator: Summary, LO, and Chart",
-    description="Uploads two handouts + learning outcomes file. Compares updated content, matches with LOs, and charts LO match scores."
 )
 iface.launch()

 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
+def extract_text_from_pdf(uploaded_file_bytes):
+    doc = fitz.open(stream=uploaded_file_bytes, filetype="pdf")
     text = ""
     for page in doc:
         page_text = page.get_text()
             text += page_text + "\n"
     return text.strip()
+def extract_los(file_bytes, filename=""):
+    ext = filename.lower().split('.')[-1]
     if ext == "txt":
         return file_bytes.decode("utf-8").splitlines()
     elif ext == "docx":
         file_stream = io.BytesIO(file_bytes)
         doc = Document(file_stream)
         return [p.text.strip() for p in doc.paragraphs if p.text.strip()]
+    return []
 def quality_check(new_text):
     words = new_text.split()
     vectorizer = TfidfVectorizer().fit_transform([content] + los)
     similarities = cosine_similarity(vectorizer[0:1], vectorizer[1:]).flatten()
     matched = []
+    scores_old = [round(np.random.uniform(1, 3), 1) for _ in los]
     scores_new = []
     for i, score in enumerate(similarities):
         if score > 0.2:
             matched.append(f"✓ {los[i]} (Match: {score:.2f})")
+        scores_new.append(round(score * 5, 1))  # normalize to 5
     return matched, len(matched), scores_old, scores_new
     fig.tight_layout()
     return fig
+def compare_handouts(old_pdf_bytes, new_pdf_bytes, lo_file_bytes, lo_filename):
+    old_text = extract_text_from_pdf(old_pdf_bytes)
+    new_text = extract_text_from_pdf(new_pdf_bytes)
+    los = extract_los(lo_file_bytes, lo_filename)
     if not old_text or not new_text:
+        return "❗ Error in file(s)", "", "", None
     added_summary, added_lines, total_lines = summarize_added_lines(old_text, new_text)
     percent_change = (added_lines / max(total_lines, 1)) * 100
     return summary_output, lo_output, stats, chart
 iface = gr.Interface(
+    fn=lambda old_pdf, new_pdf, lo_file: compare_handouts(old_pdf, new_pdf, lo_file, lo_file.name),
     inputs=[
         gr.File(label="📤 Old Handout PDF", type="binary"),
         gr.File(label="📥 New Handout PDF", type="binary"),
         gr.Textbox(label="📊 Stats & Quality", lines=5),
         gr.Plot(label="📉 LO Match Score Chart")
     ],
+    title="📘 Handout Comparator (Binary Safe)",
+    description="Upload old/new handouts + LO file. Detects changes, LO match, and generates update chart."
 )
 iface.launch()