Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
|
| 2 |
import gradio as gr
|
| 3 |
import fitz # PyMuPDF
|
| 4 |
import docx
|
|
@@ -7,6 +6,7 @@ import re
|
|
| 7 |
import os
|
| 8 |
import matplotlib.pyplot as plt
|
| 9 |
import numpy as np
|
|
|
|
| 10 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 11 |
from sentence_transformers import SentenceTransformer, util
|
| 12 |
from difflib import SequenceMatcher
|
|
@@ -62,11 +62,11 @@ def compare_handouts(old_pdf, new_pdf, lo_file):
|
|
| 62 |
new_text = extract_text_from_pdf(new_pdf)
|
| 63 |
|
| 64 |
if len(old_text.strip()) < 200 or len(new_text.strip()) < 200:
|
| 65 |
-
return "β οΈ Could not extract meaningful content from one or both PDFs.", None
|
| 66 |
|
| 67 |
lo_list = extract_text_from_docx(lo_file)
|
| 68 |
if not lo_list:
|
| 69 |
-
return "β οΈ No learning outcomes detected.", None
|
| 70 |
|
| 71 |
old_scores = semantic_match(lo_list, old_text)
|
| 72 |
new_scores = semantic_match(lo_list, new_text)
|
|
@@ -82,6 +82,16 @@ def compare_handouts(old_pdf, new_pdf, lo_file):
|
|
| 82 |
else:
|
| 83 |
summary += "β οΈ Summary: No significant improvement in LO alignment."
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
# Plot
|
| 86 |
x = np.arange(len(lo_list))
|
| 87 |
width = 0.35
|
|
@@ -95,7 +105,7 @@ def compare_handouts(old_pdf, new_pdf, lo_file):
|
|
| 95 |
ax.legend()
|
| 96 |
plt.tight_layout()
|
| 97 |
|
| 98 |
-
return summary, fig
|
| 99 |
|
| 100 |
with gr.Blocks() as demo:
|
| 101 |
gr.Markdown("π **Educational Content Comparator**")
|
|
@@ -112,8 +122,9 @@ with gr.Blocks() as demo:
|
|
| 112 |
|
| 113 |
output_text = gr.Textbox(label="π Summary", lines=5, interactive=False)
|
| 114 |
output_plot = gr.Plot(label="π LO Match Chart")
|
|
|
|
| 115 |
|
| 116 |
-
btn.click(fn=compare_handouts, inputs=[old_pdf, new_pdf, lo_file], outputs=[output_text, output_plot])
|
| 117 |
-
clear_btn.click(fn=lambda: ("", None), inputs=[], outputs=[output_text, output_plot])
|
| 118 |
|
| 119 |
demo.launch()
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import fitz # PyMuPDF
|
| 3 |
import docx
|
|
|
|
| 6 |
import os
|
| 7 |
import matplotlib.pyplot as plt
|
| 8 |
import numpy as np
|
| 9 |
+
import pandas as pd
|
| 10 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 11 |
from sentence_transformers import SentenceTransformer, util
|
| 12 |
from difflib import SequenceMatcher
|
|
|
|
| 62 |
new_text = extract_text_from_pdf(new_pdf)
|
| 63 |
|
| 64 |
if len(old_text.strip()) < 200 or len(new_text.strip()) < 200:
|
| 65 |
+
return "β οΈ Could not extract meaningful content from one or both PDFs.", None, None
|
| 66 |
|
| 67 |
lo_list = extract_text_from_docx(lo_file)
|
| 68 |
if not lo_list:
|
| 69 |
+
return "β οΈ No learning outcomes detected.", None, None
|
| 70 |
|
| 71 |
old_scores = semantic_match(lo_list, old_text)
|
| 72 |
new_scores = semantic_match(lo_list, new_text)
|
|
|
|
| 82 |
else:
|
| 83 |
summary += "β οΈ Summary: No significant improvement in LO alignment."
|
| 84 |
|
| 85 |
+
# Create comparison table
|
| 86 |
+
df = pd.DataFrame({
|
| 87 |
+
"Learning Outcome": [f"LO{i+1}" for i in range(len(lo_list))],
|
| 88 |
+
"Old Match Score": old_scores,
|
| 89 |
+
"New Match Score": new_scores,
|
| 90 |
+
"Improvement": np.array(new_scores) - np.array(old_scores)
|
| 91 |
+
})
|
| 92 |
+
excel_path = "/mnt/data/LO_Comparison_Report.xlsx"
|
| 93 |
+
df.to_excel(excel_path, index=False)
|
| 94 |
+
|
| 95 |
# Plot
|
| 96 |
x = np.arange(len(lo_list))
|
| 97 |
width = 0.35
|
|
|
|
| 105 |
ax.legend()
|
| 106 |
plt.tight_layout()
|
| 107 |
|
| 108 |
+
return summary, fig, excel_path
|
| 109 |
|
| 110 |
with gr.Blocks() as demo:
|
| 111 |
gr.Markdown("π **Educational Content Comparator**")
|
|
|
|
| 122 |
|
| 123 |
output_text = gr.Textbox(label="π Summary", lines=5, interactive=False)
|
| 124 |
output_plot = gr.Plot(label="π LO Match Chart")
|
| 125 |
+
output_excel = gr.File(label="π Download Excel Report")
|
| 126 |
|
| 127 |
+
btn.click(fn=compare_handouts, inputs=[old_pdf, new_pdf, lo_file], outputs=[output_text, output_plot, output_excel])
|
| 128 |
+
clear_btn.click(fn=lambda: ("", None, None), inputs=[], outputs=[output_text, output_plot, output_excel])
|
| 129 |
|
| 130 |
demo.launch()
|