Deevyankar commited on
Commit
89ad98d
Β·
verified Β·
1 Parent(s): 357d8c6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -0
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import fitz # PyMuPDF
4
+ from docx import Document
5
+ import io
6
+ import difflib
7
+ from sklearn.feature_extraction.text import TfidfVectorizer
8
+ from sklearn.metrics.pairwise import cosine_similarity
9
+
10
+ def extract_text_from_pdf(uploaded_file):
11
+ try:
12
+ file_bytes = uploaded_file if isinstance(uploaded_file, bytes) else uploaded_file.read()
13
+ doc = fitz.open(stream=file_bytes, filetype="pdf")
14
+ text = ""
15
+ for page in doc:
16
+ page_text = page.get_text()
17
+ if page_text.strip():
18
+ text += page_text + "\n"
19
+ return text.strip()
20
+ except Exception as e:
21
+ return f"Error extracting text: {str(e)}"
22
+
23
+ def extract_los(lo_file):
24
+ try:
25
+ file_bytes = lo_file if isinstance(lo_file, bytes) else lo_file.read()
26
+ name = getattr(lo_file, "name", "")
27
+ ext = name.lower().split('.')[-1] if name else "docx"
28
+
29
+ if ext == "txt":
30
+ return file_bytes.decode("utf-8").splitlines()
31
+ elif ext == "docx":
32
+ file_stream = io.BytesIO(file_bytes)
33
+ doc = Document(file_stream)
34
+ return [p.text.strip() for p in doc.paragraphs if p.text.strip()]
35
+ else:
36
+ return []
37
+ except Exception as e:
38
+ return [f"Error loading LOs: {str(e)}"]
39
+
40
+ def calculate_similarity(text, los):
41
+ if not los:
42
+ return 0.0
43
+ combined_los = " ".join(los)
44
+ texts = [text, combined_los]
45
+ vectorizer = TfidfVectorizer().fit_transform(texts)
46
+ return cosine_similarity(vectorizer[0:1], vectorizer[1:2])[0][0] * 100
47
+
48
+ def quality_check(new_text):
49
+ words = new_text.split()
50
+ return "🟒 Content quality seems improved." if len(words) > 300 else "🟑 Content quality needs enhancement."
51
+
52
+ def visual_diff(old_text, new_text):
53
+ diff = difflib.unified_diff(
54
+ old_text.splitlines(), new_text.splitlines(),
55
+ lineterm='', n=3
56
+ )
57
+ return "\n".join(diff)
58
+
59
+ def compare_handouts(old_pdf, new_pdf, lo_file):
60
+ old_text = extract_text_from_pdf(old_pdf)
61
+ new_text = extract_text_from_pdf(new_pdf)
62
+ los = extract_los(lo_file)
63
+
64
+ if not old_text or not new_text:
65
+ return "❗ One or both PDFs may not contain extractable text.", "", "", ""
66
+
67
+ old_lines = set(old_text.splitlines())
68
+ new_lines = set(new_text.splitlines())
69
+
70
+ added = new_lines - old_lines
71
+ removed = old_lines - new_lines
72
+ total_lines = max(len(old_lines.union(new_lines)), 1)
73
+ change_percent = ((len(added) + len(removed)) / total_lines) * 100
74
+
75
+ similarity_score = calculate_similarity(new_text, los)
76
+ quality_statement = quality_check(new_text)
77
+ diff_output = visual_diff(old_text, new_text)
78
+
79
+ summary = f"πŸ” **Change Summary:**\n- Added lines: {len(added)}\n- Removed lines: {len(removed)}\n- Change %: {change_percent:.2f}%"
80
+ lo_output = "\n".join([f"β€’ {lo}" for lo in los]) if los else "No learning outcomes detected."
81
+ sim_output = f"πŸ“ **LO Similarity Score:** {similarity_score:.2f}%"
82
+
83
+ return summary, lo_output, sim_output, quality_statement + "\n\nπŸ“„ Visual Diff:\n" + diff_output
84
+
85
+ iface = gr.Interface(
86
+ fn=compare_handouts,
87
+ inputs=[
88
+ gr.File(label="πŸ“€ Old Handout PDF", type="binary"),
89
+ gr.File(label="πŸ“₯ New Handout PDF", type="binary"),
90
+ gr.File(label="πŸ“š Learning Outcomes (.docx or .txt)", type="binary")
91
+ ],
92
+ outputs=[
93
+ gr.Textbox(label="🧾 Change Summary", lines=4),
94
+ gr.Textbox(label="🎯 Learning Outcomes", lines=6),
95
+ gr.Textbox(label="πŸ“ LO Semantic Similarity", lines=2),
96
+ gr.Textbox(label="πŸ“Š Visual Diff + Content Quality", lines=20)
97
+ ],
98
+ title="πŸ“Š Smart Handout Comparator with LO Matching & Quality Insights",
99
+ description="Upload PDFs and LO file to analyze update percentage, learning outcome alignment, and content improvement insights. Now with visual diff!"
100
+ )
101
+
102
+ iface.launch()