Deevyankar commited on
Commit
1add604
Β·
verified Β·
1 Parent(s): bf6da39

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -100
app.py DELETED
@@ -1,100 +0,0 @@
1
-
2
- import gradio as gr
3
- from PyPDF2 import PdfReader
4
- from sklearn.feature_extraction.text import TfidfVectorizer
5
- from sklearn.metrics.pairwise import cosine_similarity
6
- from sentence_transformers import SentenceTransformer, util
7
- import matplotlib.pyplot as plt
8
- import pandas as pd
9
- import io
10
-
11
- # Load sentence transformer model
12
- model = SentenceTransformer('all-MiniLM-L6-v2')
13
-
14
- def extract_text_from_pdf(pdf_file):
15
- try:
16
- reader = PdfReader(io.BytesIO(pdf_file))
17
- full_text = ""
18
- for page in reader.pages:
19
- text = page.extract_text()
20
- if text:
21
- full_text += text
22
- return full_text.strip()
23
- except Exception as e:
24
- return ""
25
-
26
- def tfidf_similarity(text1, text2):
27
- vectorizer = TfidfVectorizer()
28
- tfidf = vectorizer.fit_transform([text1, text2])
29
- return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]
30
-
31
- def transformer_similarity(text1, text2):
32
- emb1 = model.encode(text1, convert_to_tensor=True)
33
- emb2 = model.encode(text2, convert_to_tensor=True)
34
- return util.pytorch_cos_sim(emb1, emb2).item()
35
-
36
- def compare_all(old_pdf, new_pdf, lo_file):
37
- try:
38
- los = lo_file.decode("utf-8", errors="ignore").splitlines()
39
- los = [lo.strip() for lo in los if lo.strip()]
40
- except:
41
- return "❌ Could not read learning outcomes file.", None, None, None, None, None
42
-
43
- old_text = extract_text_from_pdf(old_pdf)
44
- new_text = extract_text_from_pdf(new_pdf)
45
-
46
- if not old_text or not new_text:
47
- return "❌ Could not extract text from one or both PDFs.", None, None, None, None, None
48
-
49
- tfidf_sim = tfidf_similarity(old_text, new_text)
50
- transformer_sim = transformer_similarity(old_text, new_text)
51
- content_diff = abs(len(new_text) - len(old_text)) / max(len(old_text), 1) * 100
52
-
53
- tfidf_summary = f"πŸ” **TF-IDF Similarity:** {round(tfidf_sim * 100, 2)}%"
54
- trans_summary = f"πŸ€– **Transformer Similarity:** {round(transformer_sim * 100, 2)}%"
55
- length_change = f"πŸ“„ **Text Length Difference:** {round(content_diff, 2)}%"
56
-
57
- insights = f"{tfidf_summary}\n{trans_summary}\n{length_change}\n"
58
-
59
- # LO-wise comparison
60
- lo_scores = []
61
- for lo in los:
62
- lo_score = transformer_similarity(lo, new_text)
63
- lo_scores.append(lo_score)
64
-
65
- labels = [f"LO{i+1}" for i in range(len(los))]
66
- df = pd.DataFrame({
67
- "Learning Outcome": labels,
68
- "Match Score (0-1)": [round(s, 2) for s in lo_scores]
69
- })
70
-
71
- # Chart
72
- fig, ax = plt.subplots(figsize=(8, 4))
73
- ax.bar(labels, lo_scores, color="skyblue")
74
- ax.set_ylim(0, 1)
75
- ax.set_ylabel("Semantic Match")
76
- ax.set_title("LO-wise Transformer Similarity")
77
- plt.xticks(rotation=45)
78
-
79
- return insights, df, fig, new_text, tfidf_sim, transformer_sim
80
-
81
- iface = gr.Interface(
82
- fn=compare_all,
83
- inputs=[
84
- gr.File(label="Old Handout PDF", type="binary"),
85
- gr.File(label="New Handout PDF", type="binary"),
86
- gr.File(label="Learning Outcomes (TXT)", type="binary"),
87
- ],
88
- outputs=[
89
- gr.Textbox(label="πŸ“˜ Summary of Analysis"),
90
- gr.Dataframe(label="πŸ“Š LO-wise Semantic Comparison"),
91
- gr.Plot(label="πŸ“ˆ LO Match Chart"),
92
- gr.Textbox(label="πŸ“– New Handout Preview (Full Text)", lines=10, max_lines=20),
93
- gr.Number(label="TF-IDF Similarity Score"),
94
- gr.Number(label="Transformer Similarity Score"),
95
- ],
96
- title="πŸ“˜ Course Handout Comparison Tool",
97
- description="Compare old and new handouts, analyze semantic change, LO alignment, and visualize Bloom's mapping."
98
- )
99
-
100
- iface.launch()