fizzarif7 commited on
Commit
276b4e8
·
verified ·
1 Parent(s): be92f7d

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -178
app.py DELETED
@@ -1,178 +0,0 @@
1
- import gradio as gr
2
- from transformers import pipeline
3
- import pdfplumber
4
- import docx
5
- from PIL import Image
6
- import pytesseract
7
- from textblob import TextBlob
8
- import re
9
- import fitz
10
- import os
11
-
12
- # ------------------------
13
- # Hugging Face Model
14
- # ------------------------
15
- classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
16
-
17
- # ------------------------
18
- # Extraction Functions
19
- # ------------------------
20
- def extract_text_from_pdf(file_path):
21
- text = ""
22
- with pdfplumber.open(file_path) as pdf:
23
- for page in pdf.pages:
24
- page_text = page.extract_text()
25
- if page_text:
26
- text += page_text + "\n"
27
-
28
- if not text.strip(): # OCR fallback
29
- ocr_text = ""
30
- doc = fitz.open(file_path)
31
- for page_num in range(len(doc)):
32
- page = doc[page_num]
33
- pix = page.get_pixmap()
34
- img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
35
- ocr_text += pytesseract.image_to_string(img) + "\n"
36
- text = ocr_text
37
- return text.strip()
38
-
39
- def extract_text_from_docx(file_path):
40
- doc_file = docx.Document(file_path)
41
- return "\n".join([p.text for p in doc_file.paragraphs]).strip()
42
-
43
- def extract_text_from_image(file_path):
44
- return pytesseract.image_to_string(Image.open(file_path)).strip()
45
-
46
- def check_grammar(text):
47
- blob = TextBlob(text)
48
- corrected_text = str(blob.correct())
49
- return corrected_text != text
50
-
51
- def extract_dates(text):
52
- date_patterns = [
53
- r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b',
54
- r'\b\d{1,2}\.\d{1,2}\.\d{2,4}\b',
55
- r'\b\d{1,2}(?:st|nd|rd|th)?\s+\w+\s*,?\s*\d{2,4}\b',
56
- r'\b\w+\s+\d{1,2},\s*\d{4}\b',
57
- ]
58
- dates_found = []
59
- for pattern in date_patterns:
60
- matches = re.findall(pattern, text, flags=re.IGNORECASE)
61
- dates_found.extend(matches)
62
- return list(set(dates_found))
63
-
64
- def classify_dates(text, dates):
65
- issue_keywords = ["issued on", "dated", "notified on", "circular no"]
66
- event_keywords = ["holiday", "observed on", "exam on", "will be held on", "effective from"]
67
-
68
- issue_dates, event_dates = [], []
69
- for d in dates:
70
- idx = text.lower().find(d.lower())
71
- if idx != -1:
72
- context = text[max(0, idx-60): idx+60].lower()
73
- if any(k in context for k in issue_keywords):
74
- issue_dates.append(d)
75
- elif any(k in context for k in event_keywords):
76
- after_text = text[idx: idx+80]
77
- match = re.search(rf"{re.escape(d)}[^\n]*", after_text)
78
- event_dates.append(match.group().strip() if match else d)
79
- if not issue_dates and dates:
80
- issue_dates.append(dates[0])
81
- return issue_dates, event_dates
82
-
83
- # ------------------------
84
- # Verification Logic
85
- # ------------------------
86
- def verify_text(text, source_type="TEXT"):
87
- if not text.strip():
88
- return "--- Evidence Report ---\n\n❌ No readable text provided."
89
- grammar_issue = check_grammar(text)
90
- dates = extract_dates(text)
91
- issue_dates, event_dates = classify_dates(text, dates)
92
- labels = ["REAL", "FAKE"]
93
- result = classifier(text[:1000], candidate_labels=labels)
94
-
95
- report = "📄 Evidence Report\n\n"
96
- report += "🔎 Document Analysis\n\n"
97
- report += f"Source: {source_type}\n\n"
98
- report += "✅ Evidence Considered\n\n"
99
- report += ("Minor grammar/spelling issues detected.\n\n" if grammar_issue
100
- else "No major grammar or spelling issues detected.\n\n")
101
- if issue_dates:
102
- report += f"📌 Document Issue Date(s): {', '.join(issue_dates)}\n"
103
- if event_dates:
104
- report += f"📌 Event/Holiday Date(s): {', '.join(event_dates)}\n"
105
- if not dates:
106
- report += "No specific dates were clearly detected.\n"
107
- report += "\nDocument formatting and tone resemble genuine notices.\n\n"
108
- report += "🏁 Classification Result\n\n"
109
- report += f"Verdict: {result['labels'][0]}\n"
110
- report += f"Confidence: {result['scores'][0]:.2f}\n"
111
- return report
112
-
113
- def verify_document(file):
114
- if file is None:
115
- return None, "❌ Please upload a file."
116
- file_path = file.name
117
- ext = file_path.split('.')[-1].lower()
118
- if ext == "pdf":
119
- text = extract_text_from_pdf(file_path)
120
- preview = text[:1000] + ("..." if len(text) > 1000 else "")
121
- elif ext == "docx":
122
- text = extract_text_from_docx(file_path)
123
- preview = text[:1000] + ("..." if len(text) > 1000 else "")
124
- elif ext in ["png", "jpg", "jpeg"]:
125
- text = extract_text_from_image(file_path)
126
- preview = Image.open(file_path) # show image preview
127
- else:
128
- return None, "Unsupported file type."
129
- return preview, verify_text(text, source_type=ext.upper())
130
-
131
- def process_text_input(manual_text):
132
- if manual_text.strip():
133
- return manual_text, verify_text(manual_text, source_type="MANUAL TEXT")
134
- return None, "❌ Please paste some text first."
135
-
136
- # ------------------------
137
- # Gradio UI
138
- # ------------------------
139
- with gr.Blocks(theme=gr.themes.Soft(), css="""
140
- #report-box {background:#f9f9fb; border-radius:10px; padding:15px; box-shadow:0 2px 6px rgba(0,0,0,0.1);}
141
- #preview-box {background:#eef7ff; border-radius:10px; padding:15px; box-shadow:0 2px 6px rgba(0,0,0,0.1);}
142
- """) as demo:
143
- gr.Markdown("## 📑 Document Authenticity Verifier")
144
- gr.Markdown("Choose an option below to verify your document:")
145
-
146
- with gr.Tabs():
147
- with gr.Tab("📂 Upload File"):
148
- file_input = gr.File(label="Upload Document", file_types=[".pdf", ".docx", ".png", ".jpg", ".jpeg"])
149
-
150
- # Use both preview options: one for text, one for image
151
- preview_text = gr.Textbox(label="📄 File/Text Preview", lines=10, elem_id="preview-box")
152
- preview_image = gr.Image(label="🖼️ Image Preview", elem_id="preview-box")
153
-
154
- report_box = gr.Textbox(label="Verification Report", lines=20, elem_id="report-box")
155
- verify_btn_file = gr.Button("🔍 Verify Document")
156
-
157
- def handle_file(file):
158
- preview, report = verify_document(file)
159
- if isinstance(preview, Image.Image): # image case
160
- return None, preview, report
161
- else: # text case
162
- return preview, None, report
163
-
164
- verify_btn_file.click(fn=handle_file, inputs=file_input, outputs=[preview_text, preview_image, report_box])
165
-
166
- with gr.Tab("📝 Paste Text"):
167
- text_input = gr.Textbox(label="Paste Notification Text", lines=10, placeholder="Paste text here...")
168
- preview_text2 = gr.Textbox(label="Text Preview", lines=10, elem_id="preview-box")
169
- report_box_text = gr.Textbox(label="Verification Report", lines=20, elem_id="report-box")
170
- verify_btn_text = gr.Button("🔍 Verify Text")
171
-
172
- verify_btn_text.click(fn=process_text_input, inputs=text_input, outputs=[preview_text2, report_box_text])
173
-
174
- # ------------------------
175
- # Launch
176
- # ------------------------
177
- if __name__ == "__main__":
178
- demo.launch()