Spaces:

subhan971
/

Zouq_ul_ilm_notes_generator

Sleeping

App Files Files Community

subhan971 commited on Oct 19, 2025

Commit

aba55b6

verified ·

1 Parent(s): a662849

Upload pdf.py

Browse files

Files changed (1) hide show

pdf.py +194 -0

pdf.py ADDED Viewed

	@@ -0,0 +1,194 @@

+from fpdf import FPDF
+import re
+class PDF(FPDF):
+    def footer(self):
+        """Footer with custom text on every page"""
+        self.set_y(-15)
+        self.set_font('Arial', 'I', 8)
+        self.set_text_color(128, 128, 128)
+        self.cell(0, 10, 'Generated by Zouq-ul-ilm', 0, 0, 'C')
+def pdf1(text):
+    pdf = PDF('P', 'mm', 'A4')
+    pdf.add_page()
+    pdf.set_auto_page_break(auto=True, margin=15)
+    # Improved patterns
+    h1_pattern = re.compile(r'^#\s+(.+)', re.MULTILINE)
+    h2_pattern = re.compile(r'^##\s+(.+)', re.MULTILINE)
+    h3_pattern = re.compile(r'^###\s+(.+)', re.MULTILINE)
+    bullet_pattern = re.compile(r'^\s*[-*]\s+(.+)', re.MULTILINE)
+    # Split text into lines for better processing
+    lines = text.split('\n')
+    for line in lines:
+        line = line.strip()
+        if not line:
+            pdf.ln(4)  # Add spacing for empty lines
+            continue
+        # Check for headers first
+        if h1_pattern.match(line):
+            pdf.ln(4)
+            pdf.set_font('Arial', 'B', 18)
+            pdf.set_text_color(0, 0, 0)
+            clean_text = re.sub(r'^#\s+', '', line)
+            pdf.multi_cell(0, 10, clean_text.encode('latin-1', 'ignore').decode('latin-1'))
+            pdf.ln(3)
+        elif h2_pattern.match(line):
+            pdf.ln(3)
+            pdf.set_font('Arial', 'B', 15)
+            pdf.set_text_color(0, 0, 0)
+            clean_text = re.sub(r'^##\s+', '', line)
+            pdf.multi_cell(0, 8, clean_text.encode('latin-1', 'ignore').decode('latin-1'))
+            pdf.ln(2)
+        elif h3_pattern.match(line):
+            pdf.ln(2)
+            pdf.set_font('Arial', 'B', 13)
+            pdf.set_text_color(0, 0, 0)
+            clean_text = re.sub(r'^###\s+', '', line)
+            pdf.multi_cell(0, 7, clean_text.encode('latin-1', 'ignore').decode('latin-1'))
+            pdf.ln(2)
+        elif bullet_pattern.match(line):
+            # Handle bullet points
+            clean_text = re.sub(r'^\s*[-*]\s+', '', line)
+            process_bullet_point(pdf, clean_text)
+        else:
+            # Normal paragraph with inline formatting
+            pdf.set_font('Arial', '', 11)
+            pdf.set_text_color(0, 0, 0)
+            process_inline_formatting(pdf, line)
+            pdf.ln(5)
+    pdf.output(f"notes.pdf")
+def process_bullet_point(pdf, text):
+    """Process bullet points with proper wrapping"""
+    left_margin = pdf.l_margin
+    bullet_indent = 5
+    text_indent = 12
+    # Set position for bullet
+    pdf.set_x(left_margin + bullet_indent)
+    pdf.set_font('Arial', '', 11)
+    pdf.cell(5, 5, chr(149), 0, 0)  # Bullet character
+    # Process the text with wrapping
+    process_inline_formatting_wrapped(pdf, text, left_margin + text_indent)
+    pdf.ln(5)
+def process_inline_formatting_wrapped(pdf, text, left_indent):
+    """Process inline formatting with proper text wrapping"""
+    # Split by formatting markers while keeping them
+    segments = re.split(r'(\*\*.*?\*\*|__.*?__|_.*?_|~~.*?~~)', text)
+    # Set initial position
+    pdf.set_x(left_indent)
+    # Calculate available width from current position to right margin
+    right_margin = pdf.w - pdf.r_margin
+    for segment in segments:
+        if not segment:
+            continue
+        # Determine formatting
+        if re.match(r'\*\*.*?\*\*|__.*?__', segment):
+            style = 'B'
+            clean_text = re.sub(r'\*\*|__', '', segment)
+        elif re.match(r'_.*?_|~~.*?~~', segment):
+            style = 'U'
+            clean_text = re.sub(r'_+|~+', '', segment)
+        else:
+            style = ''
+            clean_text = segment
+        # Handle encoding
+        try:
+            safe_text = clean_text.encode('latin-1', 'ignore').decode('latin-1')
+        except:
+            safe_text = clean_text.encode('ascii', 'ignore').decode('ascii')
+        # Split text into words for wrapping
+        words = safe_text.split(' ')
+        for i, word in enumerate(words):
+            # Add space before word (except first word of segment)
+            if i > 0 or segment != segments[0]:
+                test_word = ' ' + word
+            else:
+                test_word = word
+            pdf.set_font('Arial', style, 11)
+            word_width = pdf.get_string_width(test_word)
+            # Get current X position
+            current_x = pdf.get_x()
+            # Check if word fits on current line (with proper margin check)
+            if current_x + word_width > right_margin:
+                # Move to next line
+                pdf.ln(5)
+                pdf.set_x(left_indent)
+                # Remove leading space after line break
+                test_word = word
+                word_width = pdf.get_string_width(test_word)
+            # Write the word
+            pdf.cell(word_width, 5, test_word, 0, 0)
+def process_inline_formatting(pdf, text):
+    """Process inline formatting for normal paragraphs"""
+    left_margin = pdf.l_margin
+    process_inline_formatting_wrapped(pdf, text, left_margin)
+# Example usage
+if __name__ == "__main__":
+    sample_text = """# Hashing: Exam Prep Notes
+Hashing is a fundamental technique in computer science used for **efficient data storage and retrieval**. It allows us to quickly find an element in a collection (like an array or a list) without having to search through the whole thing.
+## 1. Core Concepts:
+* **Hash Table:** A data structure that implements an associative array, a structure that can map keys to values (like a dictionary).
+* **Key:** The input value we want to store or retrieve. The key is unique and identifies the data.
+* **Value:** The actual data associated with a key. The value is what we want to store in the hash table.
+* **Hash Function (h(x)):** A function that takes a key as input and returns an index (usually an integer number) where the corresponding value should be stored in the hash table. This index is also called the **hash value** or **hash code**.
+* **Hash Table Size (M):** The total number of slots or buckets available in the hash table that are allocated for storage.
+* **Collision:** When two different keys produce the same hash value and map to the same slot (or bucket or slot). This is inevitable and needs to be handled.
+* **Load Factor (λ):** A measure of how full the hash table is, calculated as λ = n/M where n is the number of elements and M is the hash table size. A high load factor increases the likelihood of collisions.
+## 2. How Hashing Works:
+1. **Key is provided.**
+2. **Hash Function is applied to the key:** The hash function processes the key and generates an index value.
+3. **The value is stored (or retrieved) at the calculated index** in the hash table. If there's a collision (the index is already occupied), a collision resolution technique is used.
+## 3. Hash Functions - Key Considerations:
+* **Deterministic:** The same key should always produce the same hash value. This ensures consistency.
+* **Uniformity:** Ideally, the hash function should distribute keys uniformly across the hash table to minimize collisions.
+* **Efficiency:** The hash function should be fast to compute, as it's called frequently during insertions, deletions, and lookups.
+## 4. Common Hash Functions:
+* **Division Method:** h(k) = k mod M. Simple but can lead to clustering if M is not chosen carefully.
+* **Multiplication Method:** h(k) = floor(M * (kA mod 1)) where A is a constant (often 0.618034). More complex but generally better distribution.
+* **Mid-Square Method:** Square the key, extract the middle digits, and use them as the hash value.
+This is a comprehensive overview of hashing concepts for your exam preparation."""
+    pdf1(sample_text)