subhan971 commited on
Commit
aba55b6
verified
1 Parent(s): a662849

Upload pdf.py

Browse files
Files changed (1) hide show
  1. pdf.py +194 -0
pdf.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fpdf import FPDF
2
+ import re
3
+
4
+
5
+ class PDF(FPDF):
6
+ def footer(self):
7
+ """Footer with custom text on every page"""
8
+ self.set_y(-15)
9
+ self.set_font('Arial', 'I', 8)
10
+ self.set_text_color(128, 128, 128)
11
+ self.cell(0, 10, 'Generated by Zouq-ul-ilm', 0, 0, 'C')
12
+
13
+
14
+ def pdf1(text):
15
+ pdf = PDF('P', 'mm', 'A4')
16
+ pdf.add_page()
17
+ pdf.set_auto_page_break(auto=True, margin=15)
18
+
19
+ # Improved patterns
20
+ h1_pattern = re.compile(r'^#\s+(.+)', re.MULTILINE)
21
+ h2_pattern = re.compile(r'^##\s+(.+)', re.MULTILINE)
22
+ h3_pattern = re.compile(r'^###\s+(.+)', re.MULTILINE)
23
+ bullet_pattern = re.compile(r'^\s*[-*]\s+(.+)', re.MULTILINE)
24
+
25
+ # Split text into lines for better processing
26
+ lines = text.split('\n')
27
+
28
+ for line in lines:
29
+ line = line.strip()
30
+
31
+ if not line:
32
+ pdf.ln(4) # Add spacing for empty lines
33
+ continue
34
+
35
+ # Check for headers first
36
+ if h1_pattern.match(line):
37
+ pdf.ln(4)
38
+ pdf.set_font('Arial', 'B', 18)
39
+ pdf.set_text_color(0, 0, 0)
40
+ clean_text = re.sub(r'^#\s+', '', line)
41
+ pdf.multi_cell(0, 10, clean_text.encode('latin-1', 'ignore').decode('latin-1'))
42
+ pdf.ln(3)
43
+
44
+ elif h2_pattern.match(line):
45
+ pdf.ln(3)
46
+ pdf.set_font('Arial', 'B', 15)
47
+ pdf.set_text_color(0, 0, 0)
48
+ clean_text = re.sub(r'^##\s+', '', line)
49
+ pdf.multi_cell(0, 8, clean_text.encode('latin-1', 'ignore').decode('latin-1'))
50
+ pdf.ln(2)
51
+
52
+ elif h3_pattern.match(line):
53
+ pdf.ln(2)
54
+ pdf.set_font('Arial', 'B', 13)
55
+ pdf.set_text_color(0, 0, 0)
56
+ clean_text = re.sub(r'^###\s+', '', line)
57
+ pdf.multi_cell(0, 7, clean_text.encode('latin-1', 'ignore').decode('latin-1'))
58
+ pdf.ln(2)
59
+
60
+ elif bullet_pattern.match(line):
61
+ # Handle bullet points
62
+ clean_text = re.sub(r'^\s*[-*]\s+', '', line)
63
+ process_bullet_point(pdf, clean_text)
64
+
65
+ else:
66
+ # Normal paragraph with inline formatting
67
+ pdf.set_font('Arial', '', 11)
68
+ pdf.set_text_color(0, 0, 0)
69
+ process_inline_formatting(pdf, line)
70
+ pdf.ln(5)
71
+
72
+ pdf.output(f"notes.pdf")
73
+
74
+
75
+ def process_bullet_point(pdf, text):
76
+ """Process bullet points with proper wrapping"""
77
+ left_margin = pdf.l_margin
78
+ bullet_indent = 5
79
+ text_indent = 12
80
+
81
+ # Set position for bullet
82
+ pdf.set_x(left_margin + bullet_indent)
83
+ pdf.set_font('Arial', '', 11)
84
+ pdf.cell(5, 5, chr(149), 0, 0) # Bullet character
85
+
86
+ # Process the text with wrapping
87
+ process_inline_formatting_wrapped(pdf, text, left_margin + text_indent)
88
+ pdf.ln(5)
89
+
90
+
91
+ def process_inline_formatting_wrapped(pdf, text, left_indent):
92
+ """Process inline formatting with proper text wrapping"""
93
+ # Split by formatting markers while keeping them
94
+ segments = re.split(r'(\*\*.*?\*\*|__.*?__|_.*?_|~~.*?~~)', text)
95
+
96
+ # Set initial position
97
+ pdf.set_x(left_indent)
98
+
99
+ # Calculate available width from current position to right margin
100
+ right_margin = pdf.w - pdf.r_margin
101
+
102
+ for segment in segments:
103
+ if not segment:
104
+ continue
105
+
106
+ # Determine formatting
107
+ if re.match(r'\*\*.*?\*\*|__.*?__', segment):
108
+ style = 'B'
109
+ clean_text = re.sub(r'\*\*|__', '', segment)
110
+ elif re.match(r'_.*?_|~~.*?~~', segment):
111
+ style = 'U'
112
+ clean_text = re.sub(r'_+|~+', '', segment)
113
+ else:
114
+ style = ''
115
+ clean_text = segment
116
+
117
+ # Handle encoding
118
+ try:
119
+ safe_text = clean_text.encode('latin-1', 'ignore').decode('latin-1')
120
+ except:
121
+ safe_text = clean_text.encode('ascii', 'ignore').decode('ascii')
122
+
123
+ # Split text into words for wrapping
124
+ words = safe_text.split(' ')
125
+
126
+ for i, word in enumerate(words):
127
+ # Add space before word (except first word of segment)
128
+ if i > 0 or segment != segments[0]:
129
+ test_word = ' ' + word
130
+ else:
131
+ test_word = word
132
+
133
+ pdf.set_font('Arial', style, 11)
134
+ word_width = pdf.get_string_width(test_word)
135
+
136
+ # Get current X position
137
+ current_x = pdf.get_x()
138
+
139
+ # Check if word fits on current line (with proper margin check)
140
+ if current_x + word_width > right_margin:
141
+ # Move to next line
142
+ pdf.ln(5)
143
+ pdf.set_x(left_indent)
144
+ # Remove leading space after line break
145
+ test_word = word
146
+ word_width = pdf.get_string_width(test_word)
147
+
148
+ # Write the word
149
+ pdf.cell(word_width, 5, test_word, 0, 0)
150
+
151
+
152
+ def process_inline_formatting(pdf, text):
153
+ """Process inline formatting for normal paragraphs"""
154
+ left_margin = pdf.l_margin
155
+ process_inline_formatting_wrapped(pdf, text, left_margin)
156
+
157
+
158
+ # Example usage
159
+ if __name__ == "__main__":
160
+ sample_text = """# Hashing: Exam Prep Notes
161
+
162
+ Hashing is a fundamental technique in computer science used for **efficient data storage and retrieval**. It allows us to quickly find an element in a collection (like an array or a list) without having to search through the whole thing.
163
+
164
+ ## 1. Core Concepts:
165
+
166
+ * **Hash Table:** A data structure that implements an associative array, a structure that can map keys to values (like a dictionary).
167
+ * **Key:** The input value we want to store or retrieve. The key is unique and identifies the data.
168
+ * **Value:** The actual data associated with a key. The value is what we want to store in the hash table.
169
+ * **Hash Function (h(x)):** A function that takes a key as input and returns an index (usually an integer number) where the corresponding value should be stored in the hash table. This index is also called the **hash value** or **hash code**.
170
+ * **Hash Table Size (M):** The total number of slots or buckets available in the hash table that are allocated for storage.
171
+ * **Collision:** When two different keys produce the same hash value and map to the same slot (or bucket or slot). This is inevitable and needs to be handled.
172
+ * **Load Factor (位):** A measure of how full the hash table is, calculated as 位 = n/M where n is the number of elements and M is the hash table size. A high load factor increases the likelihood of collisions.
173
+
174
+ ## 2. How Hashing Works:
175
+
176
+ 1. **Key is provided.**
177
+ 2. **Hash Function is applied to the key:** The hash function processes the key and generates an index value.
178
+ 3. **The value is stored (or retrieved) at the calculated index** in the hash table. If there's a collision (the index is already occupied), a collision resolution technique is used.
179
+
180
+ ## 3. Hash Functions - Key Considerations:
181
+
182
+ * **Deterministic:** The same key should always produce the same hash value. This ensures consistency.
183
+ * **Uniformity:** Ideally, the hash function should distribute keys uniformly across the hash table to minimize collisions.
184
+ * **Efficiency:** The hash function should be fast to compute, as it's called frequently during insertions, deletions, and lookups.
185
+
186
+ ## 4. Common Hash Functions:
187
+
188
+ * **Division Method:** h(k) = k mod M. Simple but can lead to clustering if M is not chosen carefully.
189
+ * **Multiplication Method:** h(k) = floor(M * (kA mod 1)) where A is a constant (often 0.618034). More complex but generally better distribution.
190
+ * **Mid-Square Method:** Square the key, extract the middle digits, and use them as the hash value.
191
+
192
+ This is a comprehensive overview of hashing concepts for your exam preparation."""
193
+
194
+ pdf1(sample_text)