Deevyankar commited on
Commit
4c891c6
Β·
verified Β·
1 Parent(s): 3c108e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -21
app.py CHANGED
@@ -1,13 +1,11 @@
1
 
2
-
3
  import gradio as gr
4
- import fitz # PyMuPDF
5
  from sentence_transformers import SentenceTransformer, util
6
  import matplotlib.pyplot as plt
 
7
  import numpy as np
8
- import os
9
 
10
- # Load transformer model once
11
  model = SentenceTransformer("all-MiniLM-L6-v2")
12
 
13
  def extract_text_pdf(file_obj):
@@ -17,7 +15,7 @@ def extract_text_pdf(file_obj):
17
  for page in doc:
18
  text += page.get_text()
19
  return text if text.strip() else None
20
- except Exception as e:
21
  return None
22
 
23
  def semantic_similarity(text1, text2):
@@ -25,40 +23,77 @@ def semantic_similarity(text1, text2):
25
  emb2 = model.encode([text2], convert_to_tensor=True)
26
  return float(util.pytorch_cos_sim(emb1, emb2)[0][0])
27
 
28
- def compare_docs(old_pdf, new_pdf):
 
 
 
 
 
 
 
 
29
  old_text = extract_text_pdf(old_pdf)
30
  new_text = extract_text_pdf(new_pdf)
31
 
32
  if not old_text or not new_text:
33
- return "❌ Could not extract text from one or both PDFs.", None
34
 
 
35
  sim_score = semantic_similarity(old_text, new_text)
36
  change_percent = round((1 - sim_score) * 100, 2)
37
-
38
- summary = f"πŸ“ˆ Estimated Content Change: {change_percent}%\n\n"
39
- summary += "🧠 Semantic Similarity Score: {:.2f}\n".format(sim_score)
40
 
41
  if change_percent < 10:
42
- summary += "βœ… Minor updates detected, mostly similar content."
43
  elif change_percent < 40:
44
- summary += "πŸ”„ Moderate content updates detected."
45
  else:
46
- summary += "πŸ†• Major revisions and new content identified."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- return summary, None
49
 
50
  iface = gr.Interface(
51
- fn=compare_docs,
52
  inputs=[
53
- gr.File(label="Upload Old Handout (PDF)", file_types=[".pdf"]),
54
- gr.File(label="Upload New Handout (PDF)", file_types=[".pdf"])
 
55
  ],
56
  outputs=[
57
- gr.Textbox(label="Comparison Summary"),
58
- gr.Plot(label="(Coming Soon) Visual Summary")
 
59
  ],
60
- title="πŸ“˜ Course Handout Comparator with Semantic AI",
61
- description="Upload old and new PDFs to see how much content has changed. Uses transformer model for expert-like judgment.",
62
  )
63
 
64
  iface.launch()
 
1
 
 
2
  import gradio as gr
3
+ import fitz
4
  from sentence_transformers import SentenceTransformer, util
5
  import matplotlib.pyplot as plt
6
+ import pandas as pd
7
  import numpy as np
 
8
 
 
9
  model = SentenceTransformer("all-MiniLM-L6-v2")
10
 
11
  def extract_text_pdf(file_obj):
 
15
  for page in doc:
16
  text += page.get_text()
17
  return text if text.strip() else None
18
+ except:
19
  return None
20
 
21
  def semantic_similarity(text1, text2):
 
23
  emb2 = model.encode([text2], convert_to_tensor=True)
24
  return float(util.pytorch_cos_sim(emb1, emb2)[0][0])
25
 
26
+ def compare_with_los(text, lo_list):
27
+ scores = []
28
+ for lo in lo_list:
29
+ score = util.cos_sim(model.encode(lo, convert_to_tensor=True),
30
+ model.encode(text, convert_to_tensor=True))[0][0].item()
31
+ scores.append(round(score * 100, 2))
32
+ return scores
33
+
34
+ def compare_all(old_pdf, new_pdf, lo_file):
35
  old_text = extract_text_pdf(old_pdf)
36
  new_text = extract_text_pdf(new_pdf)
37
 
38
  if not old_text or not new_text:
39
+ return "❌ Could not extract text from one or both PDFs.", None, None
40
 
41
+ # Overall semantic similarity
42
  sim_score = semantic_similarity(old_text, new_text)
43
  change_percent = round((1 - sim_score) * 100, 2)
44
+ summary = f"πŸ“ˆ Content Change: {change_percent}%\n🧠 Similarity Score: {sim_score:.2f}\n\n"
 
 
45
 
46
  if change_percent < 10:
47
+ summary += "βœ… Minor content update."
48
  elif change_percent < 40:
49
+ summary += "πŸ”„ Moderate update."
50
  else:
51
+ summary += "πŸ†• Significant changes detected."
52
+
53
+ # LO comparison
54
+ los = lo_file.read().decode("utf-8").splitlines()
55
+ old_scores = compare_with_los(old_text, los)
56
+ new_scores = compare_with_los(new_text, los)
57
+ score_diff = [round(new - old, 2) for old, new in zip(old_scores, new_scores)]
58
+
59
+ df = pd.DataFrame({
60
+ "Learning Outcome": los,
61
+ "Old Match (%)": old_scores,
62
+ "New Match (%)": new_scores,
63
+ "Change (%)": score_diff
64
+ })
65
+ table_html = df.to_html(index=False)
66
+
67
+ # Bar chart
68
+ fig, ax = plt.subplots(figsize=(10, 4))
69
+ index = np.arange(len(los))
70
+ bar_width = 0.35
71
+ ax.bar(index, old_scores, bar_width, label='Old')
72
+ ax.bar(index + bar_width, new_scores, bar_width, label='New')
73
+ ax.set_xlabel('Learning Outcomes')
74
+ ax.set_ylabel('Match Score (%)')
75
+ ax.set_title('LO-wise Semantic Match')
76
+ ax.set_xticks(index + bar_width / 2)
77
+ ax.set_xticklabels([f"LO{i+1}" for i in range(len(los))], rotation=45)
78
+ ax.legend()
79
+ fig.tight_layout()
80
 
81
+ return summary, fig, table_html
82
 
83
  iface = gr.Interface(
84
+ fn=compare_all,
85
  inputs=[
86
+ gr.File(label="Old Handout (PDF)"),
87
+ gr.File(label="New Handout (PDF)"),
88
+ gr.File(label="Learning Outcomes (.txt)", file_types=[".txt"])
89
  ],
90
  outputs=[
91
+ gr.Textbox(label="Summary"),
92
+ gr.Plot(label="LO-wise Bar Chart"),
93
+ gr.HTML(label="LO-wise Comparison Table")
94
  ],
95
+ title="πŸ“˜ Semantic Handout Comparator with LO Alignment",
96
+ description="Compare course handouts for overall change and LO alignment using transformer models."
97
  )
98
 
99
  iface.launch()