GopalKrushnaMahapatra commited on
Commit
d070828
·
verified ·
1 Parent(s): a9d08a9

Update pdf_reports.py

Browse files
Files changed (1) hide show
  1. pdf_reports.py +187 -177
pdf_reports.py CHANGED
@@ -1,186 +1,196 @@
1
- # pdf_reports.py
2
  import os
3
- import uuid
4
- from datetime import datetime
5
  from reportlab.lib.pagesizes import A4
6
  from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
7
- from reportlab.lib.enums import TA_CENTER, TA_RIGHT, TA_JUSTIFY, TA_LEFT
8
  from reportlab.lib import colors
9
- from reportlab.platypus import (
10
- BaseDocTemplate, PageTemplate, Frame, Paragraph, Spacer, Table, TableStyle
11
- )
12
  from reportlab.pdfbase.ttfonts import TTFont
13
  from reportlab.pdfbase import pdfmetrics
 
14
 
15
- # Try to load a nicer serif; fallback to Times
 
 
 
 
 
16
  try:
17
- pdfmetrics.registerFont(TTFont('DejaVuSerif', '/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf'))
18
- BODY_FONT = 'DejaVuSerif'
 
 
 
 
 
 
 
 
19
  except Exception:
20
- BODY_FONT = 'Times-Roman'
21
-
22
-
23
- def _build_doc(filepath, title_text, tiles, counts, sections, matched_sources, footer_text):
24
- PAGE_WIDTH, PAGE_HEIGHT = A4
25
- MARGIN = 36
26
- usable_width = PAGE_WIDTH - 2 * MARGIN
27
-
28
- styles = getSampleStyleSheet()
29
- styles.add(ParagraphStyle(name='ReportTitle', fontName=BODY_FONT, fontSize=18, alignment=TA_CENTER, leading=22))
30
- styles.add(ParagraphStyle(name='SmallRight', fontName=BODY_FONT, fontSize=9, alignment=TA_RIGHT, textColor=colors.HexColor("#555555")))
31
- styles.add(ParagraphStyle(name='TileBig', fontName=BODY_FONT, fontSize=30, alignment=TA_CENTER, leading=32))
32
- styles.add(ParagraphStyle(name='TileLabel', fontName=BODY_FONT, fontSize=10, alignment=TA_CENTER, textColor=colors.HexColor("#666666")))
33
- styles.add(ParagraphStyle(name='SectionHeading', fontName=BODY_FONT, fontSize=13, spaceBefore=8, spaceAfter=4, leading=15))
34
- styles.add(ParagraphStyle(name='Body', fontName=BODY_FONT, fontSize=11, leading=15, alignment=TA_JUSTIFY))
35
- styles.add(ParagraphStyle(name='HighlightYellow', fontName=BODY_FONT, fontSize=11, leading=15, backColor=colors.HexColor("#fff3b0"), alignment=TA_JUSTIFY))
36
- styles.add(ParagraphStyle(name='HighlightRed', fontName=BODY_FONT, fontSize=11, leading=15, backColor=colors.HexColor("#ffd6d6"), alignment=TA_JUSTIFY))
37
- styles.add(ParagraphStyle(name='Footer', fontName=BODY_FONT, fontSize=9, alignment=TA_RIGHT, textColor=colors.HexColor("#666666")))
38
- styles.add(ParagraphStyle(name='MatchedHeader', fontName=BODY_FONT, fontSize=12, leading=14, alignment=TA_LEFT, spaceBefore=6, spaceAfter=6))
39
-
40
- def header_footer(canvas, doc):
41
- canvas.saveState()
42
- date_str = datetime.now().strftime("%d %B %Y, %H:%M")
43
- canvas.setFont(BODY_FONT, 9)
44
- canvas.setFillColor(colors.HexColor("#555555"))
45
- canvas.drawString(MARGIN, PAGE_HEIGHT - MARGIN + 8, f"Date: {date_str}")
46
- canvas.setFont(BODY_FONT, 16)
47
- canvas.setFillColor(colors.black)
48
- canvas.drawCentredString(PAGE_WIDTH / 2.0, PAGE_HEIGHT - MARGIN + 4, title_text)
49
- canvas.setFont(BODY_FONT, 9)
50
- canvas.setFillColor(colors.HexColor("#666666"))
51
- canvas.drawRightString(PAGE_WIDTH - MARGIN, MARGIN - 10, f"Page {doc.page}")
52
- canvas.restoreState()
53
-
54
- doc = BaseDocTemplate(filepath, pagesize=A4,
55
- leftMargin=MARGIN, rightMargin=MARGIN,
56
- topMargin=MARGIN, bottomMargin=MARGIN)
57
- frame = Frame(MARGIN, MARGIN, usable_width, PAGE_HEIGHT - 2 * MARGIN, id='normal')
58
- template = PageTemplate(id='report', frames=[frame], onPage=header_footer)
59
- doc.addPageTemplates([template])
60
-
61
- story = []
62
-
63
- # Tiles (4 small summary tiles)
64
- tile_values = tiles # list of 4 dicts: {'value': '12%', 'label': 'Plagiarism'}
65
- tiles_data = [
66
- [Paragraph(f"<b>{tile_values[i]['value']}</b>", styles['TileBig']) for i in range(4)],
67
- [Paragraph(tile_values[i]['label'], styles['TileLabel']) for i in range(4)]
68
- ]
69
- tiles_table = Table(tiles_data, colWidths=[usable_width / 4.0] * 4, rowHeights=[46, 18])
70
- tiles_table.setStyle(TableStyle([
71
- ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#f7f7f9")),
72
- ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
73
- ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
74
- ('BOX', (0, 0), (-1, -1), 0.6, colors.HexColor("#dddddd")),
75
- ]))
76
- story.append(tiles_table)
77
- story.append(Spacer(1, 12))
78
-
79
- # Counts row
80
- if counts:
81
- counts_table = Table([list(counts.keys()), list(counts.values())],
82
- colWidths=[usable_width / len(counts)] * len(counts))
83
- counts_table.setStyle(TableStyle([
84
- ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#f4f6f7")),
85
- ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
86
- ('BOX', (0, 0), (-1, -1), 0.5, colors.HexColor("#e6e6e6")),
87
- ]))
88
- story.append(counts_table)
89
- story.append(Spacer(1, 12))
90
-
91
- # Sections + highlighting: sections is list of dicts: {'heading': 'Abstract', 'paragraphs': [p1, p2...]}
92
- for sec in sections or []:
93
- if sec.get('heading'):
94
- story.append(Paragraph(sec['heading'], styles['SectionHeading']))
95
- for para in sec.get('paragraphs', []):
96
- # para may be dict {'text': '...', 'highlight':'yellow'/'red'/None}
97
- if isinstance(para, dict):
98
- text = para.get('text', '')
99
- hl = para.get('highlight')
100
- if hl == 'yellow':
101
- story.append(Paragraph(text, styles['HighlightYellow']))
102
- elif hl == 'red':
103
- story.append(Paragraph(text, styles['HighlightRed']))
104
- else:
105
- story.append(Paragraph(text, styles['Body']))
106
- else:
107
- story.append(Paragraph(para, styles['Body']))
108
- story.append(Spacer(1, 6))
109
-
110
- story.append(Spacer(1, 10))
111
-
112
- # Matched Sources table (if any)
113
- if matched_sources:
114
- story.append(Paragraph("Matched Sources", styles['MatchedHeader']))
115
- ms_table_data = [["#", "Source Title", "URL", "Similarity"]]
116
- for i, ms in enumerate(matched_sources, start=1):
117
- title_par = Paragraph(ms.get('title', ''), styles['Body'])
118
- url_par = Paragraph(f'<link href="{ms.get("url", "")}">{ms.get("url", "")}</link>', styles['Body'])
119
- ms_table_data.append([str(i), title_par, url_par, ms.get('similarity', '')])
120
- ms_table = Table(ms_table_data, colWidths=[30, usable_width * 0.35, usable_width * 0.45, usable_width * 0.15])
121
- ms_table.setStyle(TableStyle([
122
- ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#f2f4f5")),
123
- ('TEXTCOLOR', (0, 0), (-1, 0), colors.HexColor("#333333")),
124
- ('ALIGN', (0, 0), (-1, 0), 'CENTER'),
125
- ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
126
- ('BOX', (0, 0), (-1, -1), 0.6, colors.HexColor("#e0e0e0")),
127
- ('INNERGRID', (0, 0), (-1, -1), 0.4, colors.HexColor("#efefef")),
128
- ('LEFTPADDING', (1, 1), (1, -1), 6),
129
- ('LEFTPADDING', (2, 1), (2, -1), 6),
130
- ]))
131
- story.append(ms_table)
132
- story.append(Spacer(1, 14))
133
-
134
- # Matched Source Overview block (footer_text)
135
- if footer_text:
136
- matched_table = Table(
137
- [[Paragraph("<b>Matched Source Overview</b>", styles['Body'])],
138
- [Paragraph(footer_text, styles['Body'])]],
139
- colWidths=[usable_width]
140
- )
141
- matched_table.setStyle(TableStyle([
142
- ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#f7fafb")),
143
- ('BOX', (0, 0), (-1, -1), 0.5, colors.HexColor("#e6e6e6")),
144
- ('LEFTPADDING', (0, 0), (-1, -1), 8),
145
- ('RIGHTPADDING', (0, 0), (-1, -1), 8),
146
- ('TOPPADDING', (0, 0), (-1, -1), 6),
147
- ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
148
- ]))
149
- story.append(matched_table)
150
- story.append(Spacer(1, 24))
151
-
152
- story.append(Paragraph("Generated by TrueWrite Scan • https://gopalkrushnamahapatra-truewrite-scan.static.hf.space", styles['Footer']))
153
-
154
- doc.build(story)
155
-
156
-
157
- def generate_report(report_type: str, out_dir: str = "/tmp", **kwargs) -> str:
158
- """
159
- report_type: "ai" | "grammar" | "plagiarism"
160
- kwargs expected:
161
- - title_text: str
162
- - tiles: list of 4 dicts [{'value': '12%', 'label': 'Plagiarism'}, ...]
163
- - counts: dict {'Words': 950, ...}
164
- - sections: list [{'heading':'','paragraphs':[...]}]
165
- - matched_sources: list [{'title','url','similarity'}]
166
- - footer_text: str
167
- Returns: path to generated PDF
168
- """
169
- os.makedirs(out_dir, exist_ok=True)
170
- filename = f"{report_type}_report_{uuid.uuid4().hex[:8]}.pdf"
171
- filepath = os.path.join(out_dir, filename)
172
-
173
- title_text = kwargs.get('title_text', "Report")
174
- tiles = kwargs.get('tiles') or [
175
- {'value': '0%', 'label': 'Plagiarism'},
176
- {'value': '0%', 'label': 'Exact Match'},
177
- {'value': '0%', 'label': 'Partial Match'},
178
- {'value': '100%', 'label': 'Unique'},
179
- ]
180
- counts = kwargs.get('counts') or {}
181
- sections = kwargs.get('sections') or []
182
- matched_sources = kwargs.get('matched_sources') or []
183
- footer_text = kwargs.get('footer_text') or ''
184
-
185
- _build_doc(filepath, title_text, tiles, counts, sections, matched_sources, footer_text)
186
- return filepath
 
 
 
1
  import os
 
 
2
  from reportlab.lib.pagesizes import A4
3
  from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
4
+ from reportlab.lib.enums import TA_CENTER, TA_RIGHT, TA_LEFT, TA_JUSTIFY
5
  from reportlab.lib import colors
6
+ from reportlab.platypus import BaseDocTemplate, PageTemplate, Frame, Paragraph, Spacer, Table, TableStyle, KeepTogether
 
 
7
  from reportlab.pdfbase.ttfonts import TTFont
8
  from reportlab.pdfbase import pdfmetrics
9
+ from datetime import datetime
10
 
11
+ # Ensure output directory exists
12
+ out_dir = "/mnt/data"
13
+ os.makedirs(out_dir, exist_ok=True)
14
+
15
+ # Register a serif-like font if available (fallback to Times)
16
+ body_font = 'Times-Roman'
17
  try:
18
+ font_path = '/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf'
19
+ if os.path.exists(font_path):
20
+ pdfmetrics.registerFont(TTFont('DejaVuSerif', font_path))
21
+ body_font = 'DejaVuSerif'
22
+ else:
23
+ # try common alternative (mac)
24
+ font_path2 = '/Library/Fonts/DejaVuSerif.ttf'
25
+ if os.path.exists(font_path2):
26
+ pdfmetrics.registerFont(TTFont('DejaVuSerif', font_path2))
27
+ body_font = 'DejaVuSerif'
28
  except Exception:
29
+ body_font = 'Times-Roman'
30
+
31
+ pdf_path = os.path.join(out_dir, "TrueWriteScan_Duplichecker_PixelPerfect_Report.pdf")
32
+ PAGE_WIDTH, PAGE_HEIGHT = A4
33
+ MARGIN = 36
34
+ usable_width = PAGE_WIDTH - 2*MARGIN
35
+
36
+ styles = getSampleStyleSheet()
37
+ styles.add(ParagraphStyle(name='ReportTitle', fontName=body_font, fontSize=18, alignment=TA_CENTER, leading=22))
38
+ styles.add(ParagraphStyle(name='SmallRight', fontName=body_font, fontSize=9, alignment=TA_RIGHT, textColor=colors.HexColor("#555555")))
39
+ styles.add(ParagraphStyle(name='TileBig', fontName=body_font, fontSize=30, alignment=TA_CENTER, leading=32))
40
+ styles.add(ParagraphStyle(name='TileLabel', fontName=body_font, fontSize=10, alignment=TA_CENTER, textColor=colors.HexColor("#666666")))
41
+ styles.add(ParagraphStyle(name='SectionHeading', fontName=body_font, fontSize=13, spaceBefore=8, spaceAfter=4, leading=15))
42
+ styles.add(ParagraphStyle(name='Body', fontName=body_font, fontSize=11, leading=15, alignment=TA_JUSTIFY))
43
+ styles.add(ParagraphStyle(name='HighlightYellow', fontName=body_font, fontSize=11, leading=15, backColor=colors.HexColor("#fff3b0"), alignment=TA_JUSTIFY))
44
+ styles.add(ParagraphStyle(name='HighlightRed', fontName=body_font, fontSize=11, leading=15, backColor=colors.HexColor("#ffd6d6"), alignment=TA_JUSTIFY))
45
+ styles.add(ParagraphStyle(name='Footer', fontName=body_font, fontSize=9, alignment=TA_RIGHT, textColor=colors.HexColor("#666666")))
46
+ styles.add(ParagraphStyle(name='MatchedHeader', fontName=body_font, fontSize=12, leading=14, alignment=TA_LEFT, spaceBefore=6, spaceAfter=6))
47
+
48
+ def header_footer(canvas, doc):
49
+ canvas.saveState()
50
+ date_str = datetime.now().strftime("%d %B %Y, %H:%M")
51
+ canvas.setFont(body_font, 9)
52
+ canvas.setFillColor(colors.HexColor("#555555"))
53
+ canvas.drawString(MARGIN, PAGE_HEIGHT - MARGIN + 8, f"Date: {date_str}")
54
+ canvas.setFont(body_font, 16)
55
+ canvas.setFillColor(colors.black)
56
+ canvas.drawCentredString(PAGE_WIDTH/2.0, PAGE_HEIGHT - MARGIN + 4, "Plagiarism Scan Report")
57
+ canvas.setFont(body_font, 9)
58
+ canvas.setFillColor(colors.HexColor("#666666"))
59
+ canvas.drawRightString(PAGE_WIDTH - MARGIN, MARGIN - 10, f"Page {doc.page}")
60
+ canvas.restoreState()
61
+
62
+ doc = BaseDocTemplate(pdf_path, pagesize=A4,
63
+ leftMargin=MARGIN, rightMargin=MARGIN,
64
+ topMargin=MARGIN, bottomMargin=MARGIN)
65
+
66
+ frame = Frame(MARGIN, MARGIN, usable_width, PAGE_HEIGHT - 2*MARGIN, id='normal')
67
+ template = PageTemplate(id='report', frames=[frame], onPage=header_footer)
68
+ doc.addPageTemplates([template])
69
+
70
+ # Optionally set metadata
71
+ doc.title = "TrueWrite Scan — Plagiarism Report"
72
+ doc.author = "TrueWrite Scan"
73
+
74
+ story = []
75
+
76
+ # Tiles (colors matched to Duplichecker-like look)
77
+ tiles_data = [
78
+ [Paragraph("<b>12%</b>", styles['TileBig']), Paragraph("<b>4%</b>", styles['TileBig']), Paragraph("<b>8%</b>", styles['TileBig']), Paragraph("<b>88%</b>", styles['TileBig'])],
79
+ [Paragraph("Plagiarism", styles['TileLabel']), Paragraph("Exact Match", styles['TileLabel']), Paragraph("Partial Match", styles['TileLabel']), Paragraph("Unique", styles['TileLabel'])]
80
+ ]
81
+ tiles_table = Table(tiles_data, colWidths=[usable_width/4.0]*4, rowHeights=[46, 18])
82
+ tiles_table.setStyle(TableStyle([
83
+ ('BACKGROUND', (0,0), (-1,0), colors.HexColor("#f7f7f9")),
84
+ ('ALIGN', (0,0), (-1,-1), 'CENTER'),
85
+ ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
86
+ ('BOX', (0,0), (-1,-1), 0.6, colors.HexColor("#dddddd")),
87
+ ]))
88
+ story.append(tiles_table)
89
+ story.append(Spacer(1, 12))
90
+
91
+ # Counts row
92
+ counts = [
93
+ ['Words', 'Characters', 'Sentences', 'Paragraphs', 'Read Time'],
94
+ ['950', '7138', '43', '16', '5 minute(s)']
95
+ ]
96
+ counts_table = Table(counts, colWidths=[usable_width/5.0]*5)
97
+ counts_table.setStyle(TableStyle([
98
+ ('BACKGROUND', (0,0), (-1,0), colors.HexColor("#f4f6f7")),
99
+ ('ALIGN', (0,0), (-1,-1), 'CENTER'),
100
+ ('BOX', (0,0), (-1,-1), 0.5, colors.HexColor("#e6e6e6")),
101
+ ]))
102
+ story.append(counts_table)
103
+ story.append(Spacer(1, 12))
104
+
105
+ # Sample content with highlighted spans (we'll highlight whole sentences as blocks)
106
+ story.append(Paragraph("Abstract—", styles['SectionHeading']))
107
+
108
+ para1 = ("Using AI, machine learning, and advanced computing together opens up a lot of possibilities to effectively tackle "
109
+ "major issues such as climate resilience and health equity.")
110
+ para2 = ("The research looks at the ways in which AI can be a social developmental tool by aiding first responders during calamities "
111
+ "as well as healthcare personalization.")
112
+ para3 = ("In particular, it mentions the use of deep learning and NLP for better prediction, efficient resource management, and improved accessibility of services.")
113
+ para4 = ("Moreover, the paper points to the issues that revolve around the ethics of the technology and the need for transparent models.")
114
+
115
+ story.append(Paragraph(para1, styles['Body']))
116
+ story.append(Spacer(1, 6))
117
+ story.append(Paragraph(para2, styles['HighlightYellow']))
118
+ story.append(Spacer(1, 6))
119
+ story.append(Paragraph(para3, styles['HighlightRed']))
120
+ story.append(Spacer(1, 6))
121
+ story.append(Paragraph(para4, styles['Body']))
122
+ story.append(Spacer(1, 10))
123
+
124
+ story.append(Paragraph("I. Introduction", styles['SectionHeading']))
125
+ for _ in range(3):
126
+ story.append(Paragraph(para1 + " " + para2, styles['Body']))
127
+ story.append(Spacer(1, 6))
128
+
129
+ story.append(Paragraph("II. Literature Review", styles['SectionHeading']))
130
+ for i in range(4):
131
+ # Insert occasional highlighted sentences
132
+ story.append(Paragraph(para2, styles['Body']))
133
+ story.append(Paragraph(para3, styles['HighlightYellow'] if i % 2 == 0 else styles['Body']))
134
+ story.append(Spacer(1, 6))
135
+
136
+ story.append(Paragraph("III. Methodology", styles['SectionHeading']))
137
+ for _ in range(4):
138
+ story.append(Paragraph(para3, styles['Body']))
139
+ story.append(Spacer(1, 6))
140
+
141
+ story.append(Spacer(1, 12))
142
+
143
+ # Matched sources header
144
+ story.append(Paragraph("Matched Sources", styles['MatchedHeader']))
145
+
146
+ matched_sources = [
147
+ {"title": "AI for Social Good", "url": "https://example.com/ai-social-good", "similarity": "42%"},
148
+ {"title": "Deep Learning Predictions", "url": "https://example.org/deep-learning", "similarity": "67%"},
149
+ {"title": "NLP and Resource Management", "url": "https://sample.org/nlp-resource-management", "similarity": "18%"}
150
+ ]
151
+
152
+ ms_table_data = [["#", "Source Title", "URL", "Similarity"]]
153
+ for i, ms in enumerate(matched_sources, start=1):
154
+ title_par = Paragraph(ms['title'], styles['Body'])
155
+ # Use <a href="..."> for clickable link
156
+ url_par = Paragraph(f'<a href="{ms["url"]}">{ms["url"]}</a>', styles['Body'])
157
+ ms_table_data.append([str(i), title_par, url_par, ms['similarity']])
158
+
159
+ ms_table = Table(ms_table_data, colWidths=[30, usable_width*0.35, usable_width*0.45, usable_width*0.15])
160
+ ms_table.setStyle(TableStyle([
161
+ ('BACKGROUND', (0,0), (-1,0), colors.HexColor("#f2f4f5")),
162
+ ('TEXTCOLOR', (0,0), (-1,0), colors.HexColor("#333333")),
163
+ ('ALIGN', (0,0), (-1,0), 'CENTER'),
164
+ ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
165
+ ('BOX', (0,0), (-1,-1), 0.6, colors.HexColor("#e0e0e0")),
166
+ ('INNERGRID', (0,0), (-1,-1), 0.4, colors.HexColor("#efefef")),
167
+ ('LEFTPADDING', (1,1), (1,-1), 6),
168
+ ('LEFTPADDING', (2,1), (2,-1), 6),
169
+ ('WORDWRAP', (2,1), (2,-1), 'CJK') # helps long URLs wrap
170
+ ]))
171
+ story.append(ms_table)
172
+ story.append(Spacer(1, 14))
173
+
174
+ # Matched Source final block
175
+ matched_table = Table(
176
+ [[Paragraph("<b>Matched Source Overview</b>", styles['Body'])],
177
+ [Paragraph("Congratulations! Sections highlighted indicate similarity with external sources. Please review matched entries for exact references.", styles['Body'])]],
178
+ colWidths=[usable_width]
179
+ )
180
+ matched_table.setStyle(TableStyle([
181
+ ('BACKGROUND', (0,0), (-1,0), colors.HexColor("#f7fafb")),
182
+ ('BOX', (0,0), (-1,-1), 0.5, colors.HexColor("#e6e6e6")),
183
+ ('LEFTPADDING', (0,0), (-1,-1), 8),
184
+ ('RIGHTPADDING', (0,0), (-1,-1), 8),
185
+ ('TOPPADDING', (0,0), (-1,-1), 6),
186
+ ('BOTTOMPADDING', (0,0), (-1,-1), 6),
187
+ ]))
188
+ story.append(matched_table)
189
+ story.append(Spacer(1, 24))
190
+
191
+ story.append(Paragraph("Generated by TrueWrite Scan • https://gopalkrushnamahapatra-truewrite-scan.static.hf.space", styles['Footer']))
192
+
193
+ # Build PDF
194
+ doc.build(story)
195
+
196
+ print("PDF written to:", pdf_path)