Spaces:

Deevyankar
/

Handouts

Sleeping

App Files Files Community

Deevyankar commited on Sep 20, 2025

Commit

ace889b

verified ·

1 Parent(s): 89e559a

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -22

app.py CHANGED Viewed

@@ -7,34 +7,33 @@ from docx import Document
 import gradio as gr
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
-# --- Extract Text ---
-def extract_text_from_pdf(pdf_bytes):
     try:
-        reader = PdfReader(pdf_bytes)
         text = ""
         for page in reader.pages:
             text += page.extract_text() or ""
         return text.strip()
-    except:
         return ""
-def extract_text_from_docx(docx_file):
-    doc = Document(docx_file)
-    return "\n".join([para.text.strip() for para in doc.paragraphs if para.text.strip()])
-# --- Change Percentage ---
 def calculate_change_percentage(old_text, new_text):
     seqm = difflib.SequenceMatcher(None, old_text, new_text)
     return (1 - seqm.ratio()) * 100
-# --- Semantic Matching ---
 def semantic_match(lo_texts, content):
-    vectorizer = TfidfVectorizer().fit_transform([content] + lo_texts)
-    similarities = cosine_similarity(vectorizer[0:1], vectorizer[1:]).flatten()
-    return similarities
-# --- Summary Generation ---
 def generate_summary(change_pct, matched_los, total_los):
     msg = f"📈 Content Change: {change_pct:.2f}%\n🎯 Matched LOs: {matched_los} of {total_los}\n"
     if change_pct > 20:
@@ -45,7 +44,6 @@ def generate_summary(change_pct, matched_los, total_los):
         msg += "🟡 Very little or no update."
     return msg
-# --- Bar Chart Plot ---
 def plot_lo_chart(lo_labels, old_scores, new_scores):
     df = pd.DataFrame({'Old': old_scores, 'New': new_scores}, index=lo_labels)
     ax = df.plot(kind='bar', figsize=(10, 5), title="LO-wise Match Score: Old vs New")
@@ -55,7 +53,6 @@ def plot_lo_chart(lo_labels, old_scores, new_scores):
     plt.tight_layout()
     return plt.gcf()
-# --- Main Comparator ---
 def compare_handouts(old_pdf, new_pdf, lo_docx):
     old_text = extract_text_from_pdf(old_pdf)
     new_text = extract_text_from_pdf(new_pdf)
@@ -63,11 +60,11 @@ def compare_handouts(old_pdf, new_pdf, lo_docx):
     if not old_text or not new_text:
         return "❌ Could not extract text from one or both PDFs.", None
-    lo_text_raw = extract_text_from_docx(lo_docx)
-    lo_list = [lo for lo in lo_text_raw.split('\n') if lo.strip()]
     if not lo_list:
-        return "❌ No learning outcomes detected in uploaded file.", None
     old_scores = semantic_match(lo_list, old_text)
     new_scores = semantic_match(lo_list, new_text)
@@ -79,20 +76,19 @@ def compare_handouts(old_pdf, new_pdf, lo_docx):
     fig = plot_lo_chart([f"LO{i+1}" for i in range(len(lo_list))], old_scores, new_scores)
     return summary, fig
-# --- Gradio App ---
 demo = gr.Interface(
     fn=compare_handouts,
     inputs=[
         gr.File(label="Upload Old PDF", type="binary"),
         gr.File(label="Upload New PDF", type="binary"),
-        gr.File(label="Upload Learning Outcomes (.docx)", type="binary"),
     ],
     outputs=[
         gr.Textbox(label="📋 Summary"),
         gr.Plot(label="📊 LO Match Chart")
     ],
     title="📘 Educational Content Comparator",
-    description="Upload 2 handouts and LO file (.docx). Detect % update, alignment with learning outcomes, and get visual summary."
 )
-demo.launch(share=True)

 import gradio as gr
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
+import io
+def extract_text_from_pdf(pdf_binary):
     try:
+        reader = PdfReader(io.BytesIO(pdf_binary))
         text = ""
         for page in reader.pages:
             text += page.extract_text() or ""
         return text.strip()
+    except Exception as e:
         return ""
+def extract_text_from_docx(docx_binary):
+    try:
+        doc = Document(io.BytesIO(docx_binary))
+        return "\n".join([p.text.strip() for p in doc.paragraphs if p.text.strip()])
+    except Exception as e:
+        return ""
 def calculate_change_percentage(old_text, new_text):
     seqm = difflib.SequenceMatcher(None, old_text, new_text)
     return (1 - seqm.ratio()) * 100
 def semantic_match(lo_texts, content):
+    vectorizer = TtfidfVectorizer().fit_transform([content] + lo_texts)
+    return cosine_similarity(vectorizer[0:1], vectorizer[1:]).flatten()
 def generate_summary(change_pct, matched_los, total_los):
     msg = f"📈 Content Change: {change_pct:.2f}%\n🎯 Matched LOs: {matched_los} of {total_los}\n"
     if change_pct > 20:
         msg += "🟡 Very little or no update."
     return msg
 def plot_lo_chart(lo_labels, old_scores, new_scores):
     df = pd.DataFrame({'Old': old_scores, 'New': new_scores}, index=lo_labels)
     ax = df.plot(kind='bar', figsize=(10, 5), title="LO-wise Match Score: Old vs New")
     plt.tight_layout()
     return plt.gcf()
 def compare_handouts(old_pdf, new_pdf, lo_docx):
     old_text = extract_text_from_pdf(old_pdf)
     new_text = extract_text_from_pdf(new_pdf)
     if not old_text or not new_text:
         return "❌ Could not extract text from one or both PDFs.", None
+    lo_raw_text = extract_text_from_docx(lo_docx)
+    lo_list = [line for line in lo_raw_text.split("\n") if line.strip()]
     if not lo_list:
+        return "❌ No learning outcomes detected.", None
     old_scores = semantic_match(lo_list, old_text)
     new_scores = semantic_match(lo_list, new_text)
     fig = plot_lo_chart([f"LO{i+1}" for i in range(len(lo_list))], old_scores, new_scores)
     return summary, fig
 demo = gr.Interface(
     fn=compare_handouts,
     inputs=[
         gr.File(label="Upload Old PDF", type="binary"),
         gr.File(label="Upload New PDF", type="binary"),
+        gr.File(label="Upload Learning Outcomes (.docx)", type="binary")
     ],
     outputs=[
         gr.Textbox(label="📋 Summary"),
         gr.Plot(label="📊 LO Match Chart")
     ],
     title="📘 Educational Content Comparator",
+    description="Upload 2 handouts and a .docx file of Learning Outcomes to compare changes and alignment."
 )
+demo.launch(share=True)