Deevyankar commited on
Commit
0a99a54
Β·
verified Β·
1 Parent(s): e17d93e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -35
app.py CHANGED
@@ -1,22 +1,31 @@
1
- # app.py
2
  import gradio as gr
 
3
  import difflib
4
- from docx import Document
5
- import os
6
-
7
- def extract_text(file):
8
- ext = os.path.splitext(file.name)[1]
9
- if ext == ".txt":
10
- return file.read().decode("utf-8")
11
- elif ext == ".docx":
12
- doc = Document(file)
13
- return "\n".join([para.text for para in doc.paragraphs])
 
 
 
 
 
 
 
 
14
  else:
15
- return "Unsupported file type. Please upload a .txt or .docx file."
16
 
17
- def compare_documents(old_file, new_file):
18
- old_text = extract_text(old_file)
19
- new_text = extract_text(new_file)
 
20
 
21
  old_lines = old_text.splitlines()
22
  new_lines = new_text.splitlines()
@@ -24,24 +33,20 @@ def compare_documents(old_file, new_file):
24
  diff = list(difflib.unified_diff(old_lines, new_lines))
25
  added = [line for line in diff if line.startswith('+') and not line.startswith('+++')]
26
  removed = [line for line in diff if line.startswith('-') and not line.startswith('---')]
27
-
28
  percent_change = (len(added) + len(removed)) / max(len(old_lines), 1) * 100
29
- summary = f"πŸ“ˆ Updated Content: {percent_change:.2f}%\n\n"
30
- summary += f"πŸ”Ό Added: {len(added)} lines\nπŸ”½ Removed: {len(removed)} lines\n\n"
31
- preview = "\n".join(diff[:100]) or "No differences found."
32
-
33
- return summary + preview
34
-
35
- # Gradio Interface
36
- iface = gr.Interface(
37
- fn=compare_documents,
38
- inputs=[
39
- gr.File(label="Upload Old Document (.txt or .docx)"),
40
- gr.File(label="Upload New Document (.txt or .docx)")
41
- ],
42
- outputs="text",
43
- title="πŸ“„ Document Version Comparator",
44
- description="Upload two versions of a document to compare updates and get change percentage. Supports .txt and .docx files."
45
- )
46
-
47
- iface.launch()
 
 
1
  import gradio as gr
2
+ import fitz # PyMuPDF
3
  import difflib
4
+ from sentence_transformers import SentenceTransformer, util
5
+
6
+ model = SentenceTransformer('all-MiniLM-L6-v2')
7
+
8
+ def extract_text_from_pdf(pdf_file):
9
+ doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
10
+ full_text = ""
11
+ for page in doc:
12
+ full_text += page.get_text()
13
+ return full_text
14
+
15
+ def extract_los(lo_file):
16
+ if lo_file.name.endswith('.txt'):
17
+ return lo_file.read().decode('utf-8').splitlines()
18
+ elif lo_file.name.endswith('.docx'):
19
+ from docx import Document
20
+ doc = Document(lo_file)
21
+ return [para.text for para in doc.paragraphs if para.text.strip()]
22
  else:
23
+ return []
24
 
25
+ def compare_and_assess(old_pdf, new_pdf, lo_file):
26
+ # Compare PDFs
27
+ old_text = extract_text_from_pdf(old_pdf)
28
+ new_text = extract_text_from_pdf(new_pdf)
29
 
30
  old_lines = old_text.splitlines()
31
  new_lines = new_text.splitlines()
 
33
  diff = list(difflib.unified_diff(old_lines, new_lines))
34
  added = [line for line in diff if line.startswith('+') and not line.startswith('+++')]
35
  removed = [line for line in diff if line.startswith('-') and not line.startswith('---')]
 
36
  percent_change = (len(added) + len(removed)) / max(len(old_lines), 1) * 100
37
+
38
+ # LO analysis
39
+ los = extract_los(lo_file)
40
+ new_emb = model.encode(new_text, convert_to_tensor=True)
41
+ lo_scores = []
42
+ for lo in los:
43
+ lo_emb = model.encode(lo, convert_to_tensor=True)
44
+ score = util.cos_sim(new_emb, lo_emb).max().item()
45
+ lo_scores.append(f"β€’ {lo[:80]}: {score*100:.1f}% relevance")
46
+
47
+ # Format Output
48
+ summary = f"πŸ“ˆ Content Updated: {percent_change:.2f}%\n"
49
+ summary += f"πŸ”Ό Added Lines: {len(added)} | πŸ”½ Removed Lines: {len(removed)}\n\n"
50
+ summary += "🎯 Learning Outcome Coverage:\n" + "\n".join(lo_scores[:10])
51
+ return summary
52
+