Deevyankar commited on
Commit
39ec5fb
Β·
verified Β·
1 Parent(s): 843f763

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -34
app.py CHANGED
@@ -3,12 +3,12 @@ import fitz # PyMuPDF
3
  import difflib
4
  from sentence_transformers import SentenceTransformer, util
5
  from docx import Document
 
6
 
7
- # Load the AI model for semantic similarity
8
  model = SentenceTransformer('all-MiniLM-L6-v2')
9
 
10
-
11
- # Extract raw text from PDF
12
  def extract_text_from_pdf(pdf_file):
13
  doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
14
  full_text = ""
@@ -16,8 +16,7 @@ def extract_text_from_pdf(pdf_file):
16
  full_text += page.get_text()
17
  return full_text
18
 
19
- import io
20
-
21
  def extract_los(lo_file):
22
  if lo_file.name.endswith('.txt'):
23
  return lo_file.read().decode('utf-8').splitlines()
@@ -28,54 +27,51 @@ def extract_los(lo_file):
28
  else:
29
  return []
30
 
31
- """"# Extract lines from uploaded LO file (.txt or .docx)
32
- def extract_los(lo_file):
33
- if lo_file.name.endswith('.txt'):
34
- return lo_file.read().decode('utf-8').splitlines()
35
- elif lo_file.name.endswith('.docx'):
36
- doc = Document(lo_file)
37
- return [para.text.strip() for para in doc.paragraphs if para.text.strip()]
38
- else:
39
- return []"""
40
-
41
-
42
- # Main function to compare PDFs and assess LO coverage
43
  def compare_and_assess(old_pdf, new_pdf, lo_file):
44
- # Extract content
 
 
 
45
  old_text = extract_text_from_pdf(old_pdf)
46
  new_text = extract_text_from_pdf(new_pdf)
47
 
48
- # Compare versions
 
 
 
49
  old_lines = old_text.splitlines()
50
  new_lines = new_text.splitlines()
51
-
52
  diff = list(difflib.unified_diff(old_lines, new_lines))
 
53
  added = [line for line in diff if line.startswith('+') and not line.startswith('+++')]
54
  removed = [line for line in diff if line.startswith('-') and not line.startswith('---')]
55
  percent_change = (len(added) + len(removed)) / max(len(old_lines), 1) * 100
56
 
57
- # Learning Outcome Analysis
58
  los = extract_los(lo_file)
 
 
 
 
59
  lo_scores = []
60
- if los:
61
- new_emb = model.encode(new_text, convert_to_tensor=True)
62
- for lo in los:
63
- lo_emb = model.encode(lo, convert_to_tensor=True)
64
- sim = util.cos_sim(new_emb, lo_emb).max().item()
65
- lo_scores.append(f"β€’ {lo[:80]}: {sim*100:.1f}% relevant")
66
 
67
  # Output
68
  summary = f"πŸ“ˆ Content Updated: {percent_change:.2f}%\n"
69
  summary += f"πŸ”Ό Added Lines: {len(added)}\nπŸ”½ Removed Lines: {len(removed)}\n\n"
70
- if lo_scores:
71
- summary += "🎯 Learning Outcome Coverage:\n" + "\n".join(lo_scores[:10])
72
- else:
73
- summary += "⚠️ No valid Learning Outcome file uploaded."
74
 
75
- return summary
 
 
76
 
 
77
 
78
- # Define Gradio interface
79
  iface = gr.Interface(
80
  fn=compare_and_assess,
81
  inputs=[
@@ -85,7 +81,8 @@ iface = gr.Interface(
85
  ],
86
  outputs="text",
87
  title="πŸ“š Course Handout Comparator + LO Evaluator",
88
- description="Upload two PDF handouts (old + new) and a Learning Outcome file. The app compares content, calculates % updated, and checks how well the new handout meets your course learning outcomes."
89
  )
90
 
91
  iface.launch()
 
 
3
  import difflib
4
  from sentence_transformers import SentenceTransformer, util
5
  from docx import Document
6
+ import io
7
 
8
+ # Load the sentence-transformer model
9
  model = SentenceTransformer('all-MiniLM-L6-v2')
10
 
11
+ # Extract text from PDF using PyMuPDF
 
12
  def extract_text_from_pdf(pdf_file):
13
  doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
14
  full_text = ""
 
16
  full_text += page.get_text()
17
  return full_text
18
 
19
+ # Extract Learning Outcomes from .txt or .docx
 
20
  def extract_los(lo_file):
21
  if lo_file.name.endswith('.txt'):
22
  return lo_file.read().decode('utf-8').splitlines()
 
27
  else:
28
  return []
29
 
30
+ # Main app logic
 
 
 
 
 
 
 
 
 
 
 
31
  def compare_and_assess(old_pdf, new_pdf, lo_file):
32
+ if not old_pdf or not new_pdf or not lo_file:
33
+ return "❌ Please upload all three files."
34
+
35
+ # Extract text
36
  old_text = extract_text_from_pdf(old_pdf)
37
  new_text = extract_text_from_pdf(new_pdf)
38
 
39
+ if len(old_text.strip()) < 50 or len(new_text.strip()) < 50:
40
+ return "⚠️ One of the PDFs may be empty or unreadable."
41
+
42
+ # Diff analysis
43
  old_lines = old_text.splitlines()
44
  new_lines = new_text.splitlines()
 
45
  diff = list(difflib.unified_diff(old_lines, new_lines))
46
+
47
  added = [line for line in diff if line.startswith('+') and not line.startswith('+++')]
48
  removed = [line for line in diff if line.startswith('-') and not line.startswith('---')]
49
  percent_change = (len(added) + len(removed)) / max(len(old_lines), 1) * 100
50
 
51
+ # LO analysis
52
  los = extract_los(lo_file)
53
+ if not los:
54
+ return "⚠️ No valid Learning Outcomes found in the file."
55
+
56
+ new_emb = model.encode(new_text, convert_to_tensor=True)
57
  lo_scores = []
58
+ for lo in los:
59
+ lo_emb = model.encode(lo, convert_to_tensor=True)
60
+ sim = util.cos_sim(new_emb, lo_emb).max().item()
61
+ lo_scores.append(f"β€’ {lo[:80]}: {sim*100:.1f}% relevant")
 
 
62
 
63
  # Output
64
  summary = f"πŸ“ˆ Content Updated: {percent_change:.2f}%\n"
65
  summary += f"πŸ”Ό Added Lines: {len(added)}\nπŸ”½ Removed Lines: {len(removed)}\n\n"
66
+ summary += "🎯 Learning Outcome Coverage:\n" + "\n".join(lo_scores[:10])
 
 
 
67
 
68
+ # Debug logs (can be viewed in Hugging Face Logs tab)
69
+ print("βœ… PDFs compared successfully.")
70
+ print("LOs evaluated:", len(lo_scores))
71
 
72
+ return summary
73
 
74
+ # Gradio interface
75
  iface = gr.Interface(
76
  fn=compare_and_assess,
77
  inputs=[
 
81
  ],
82
  outputs="text",
83
  title="πŸ“š Course Handout Comparator + LO Evaluator",
84
+ description="Compare two PDF handouts (old + new) and a Learning Outcome file. Calculates % updated and checks how well the new content aligns with your course outcomes."
85
  )
86
 
87
  iface.launch()
88
+