| """ |
| Whiteboard Notes β Meeting Summary |
| A Gradio Space that converts whiteboard/handwritten meeting notes into |
| structured summaries with action items, owners, and due dates. |
| |
| Designed for HuggingFace Spaces free CPU tier. |
| """ |
|
|
| import os |
| import re |
| import base64 |
| import time |
| import hashlib |
| import logging |
| import glob |
| from datetime import datetime |
| from typing import Tuple, Dict, List, Optional |
|
|
| import gradio as gr |
|
|
| |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
| logger = logging.getLogger(__name__) |
|
|
| |
| |
| |
| HF_TOKEN = os.getenv("HF_TOKEN", None) |
|
|
| |
| VISION_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct" |
|
|
| |
| MIN_REQUEST_INTERVAL = 2 |
| session_timestamps: Dict[str, float] = {} |
|
|
| |
| results_cache: Dict[str, Dict] = {} |
| MAX_CACHE_SIZE = 20 |
|
|
|
|
| |
| |
| |
|
|
| def image_to_base64_url(image_path: str) -> str: |
| """Convert image file to base64 data URL.""" |
| try: |
| with open(image_path, "rb") as f: |
| image_data = f.read() |
| |
| |
| if image_path.lower().endswith(".png"): |
| mime_type = "image/png" |
| elif image_path.lower().endswith(".gif"): |
| mime_type = "image/gif" |
| elif image_path.lower().endswith(".webp"): |
| mime_type = "image/webp" |
| else: |
| mime_type = "image/jpeg" |
| |
| base64_data = base64.b64encode(image_data).decode("utf-8") |
| return f"data:{mime_type};base64,{base64_data}" |
| except Exception as e: |
| logger.error(f"Failed to encode image: {e}") |
| raise |
|
|
|
|
| def get_image_hash(image_path: str) -> str: |
| """Generate hash of image for caching.""" |
| try: |
| with open(image_path, "rb") as f: |
| return hashlib.md5(f.read()).hexdigest()[:12] |
| except: |
| return hashlib.md5(str(time.time()).encode()).hexdigest()[:12] |
|
|
|
|
| def find_example_images() -> List[str]: |
| """Find all example images in the examples folder, supporting multiple formats.""" |
| examples = [] |
| if os.path.exists("examples"): |
| |
| for ext in ["*.jpg", "*.jpeg", "*.png", "*.webp", "*.gif", "*.bmp"]: |
| examples.extend(glob.glob(f"examples/{ext}")) |
| examples.extend(glob.glob(f"examples/{ext.upper()}")) |
| |
| examples.sort() |
| return examples |
|
|
|
|
| |
| |
| |
|
|
| class HFClient: |
| """Client for HuggingFace Inference API.""" |
| |
| def __init__(self, token: Optional[str] = None): |
| self.token = token |
| self._client = None |
| |
| @property |
| def client(self): |
| """Lazy initialization of the client.""" |
| if self._client is None: |
| try: |
| from huggingface_hub import InferenceClient |
| self._client = InferenceClient(token=self.token) |
| logger.info("HuggingFace InferenceClient initialized") |
| except ImportError: |
| logger.error("huggingface_hub not installed") |
| raise ImportError("Please install huggingface_hub") |
| return self._client |
| |
| def extract_text_from_image(self, image_path: str) -> Tuple[str, bool]: |
| """ |
| Extract text from whiteboard/handwritten notes image using OCR. |
| Returns (extracted_text, success). |
| """ |
| try: |
| image_url = image_to_base64_url(image_path) |
| |
| messages = [ |
| { |
| "role": "user", |
| "content": [ |
| { |
| "type": "image_url", |
| "image_url": {"url": image_url} |
| }, |
| { |
| "type": "text", |
| "text": """You are an expert OCR system specialized in reading whiteboard notes and handwritten text. |
| |
| Extract ALL text visible in this image. This appears to be meeting notes on a whiteboard or handwritten notes. |
| |
| Instructions: |
| 1. Read every piece of text you can see, including messy handwriting |
| 2. Preserve the structure (bullet points, numbered lists, sections) |
| 3. If text is unclear, make your best guess and mark it with [?] |
| 4. Include any names, dates, numbers, or action items you see |
| 5. Preserve any arrows, connections, or groupings in your description |
| |
| Output the extracted text exactly as written, maintaining the original structure as much as possible.""" |
| } |
| ] |
| } |
| ] |
| |
| response = self.client.chat.completions.create( |
| model=VISION_MODEL, |
| messages=messages, |
| max_tokens=2000, |
| temperature=0.1 |
| ) |
| |
| result = response.choices[0].message.content |
| logger.info(f"OCR extraction successful: {len(result)} chars") |
| return result, True |
| |
| except Exception as e: |
| error_msg = str(e) |
| logger.error(f"OCR extraction failed: {error_msg}") |
| |
| if "rate" in error_msg.lower() or "limit" in error_msg.lower(): |
| return "Rate limited. Please wait a moment and try again.", False |
| elif "loading" in error_msg.lower(): |
| return "Model is loading. Please try again in 30 seconds.", False |
| else: |
| return f"Text extraction failed: {error_msg[:150]}", False |
| |
| def generate_meeting_summary(self, extracted_text: str, meeting_context: str) -> Tuple[str, bool]: |
| """ |
| Generate structured meeting summary from extracted text. |
| Returns (summary, success). |
| """ |
| try: |
| context_info = f"\nAdditional context: {meeting_context}" if meeting_context.strip() else "" |
| |
| prompt = f"""You are an expert meeting notes organizer. Convert the following raw whiteboard/handwritten notes into a clean, professional meeting summary. |
| |
| RAW EXTRACTED TEXT: |
| {extracted_text} |
| {context_info} |
| |
| Create a structured summary with these sections. Use the EXACT headers shown: |
| |
| ## π Meeting Summary |
| |
| [2-4 bullet points capturing the main topics discussed] |
| |
| ## β
Key Decisions |
| |
| [List any decisions that were made. If none are clear, write "No explicit decisions captured"] |
| |
| ## π― Action Items |
| |
| [Create a table with these columns: Action Item | Owner | Due Date | Priority |
| - Extract any tasks, to-dos, or follow-ups mentioned |
| - If owner is not specified, write "TBD" |
| - If due date is not specified, write "TBD" |
| - Estimate priority as High/Medium/Low based on context |
| - If no action items found, write "No action items identified"] |
| |
| ## β Items Needing Clarification |
| |
| [List anything that was unclear or needs follow-up: |
| - Illegible text that couldn't be read |
| - Action items missing owners or dates |
| - Decisions that need confirmation |
| - If everything is clear, write "None"] |
| |
| ## π Raw Notes (for reference) |
| |
| [Include a cleaned-up version of the original notes] |
| |
| IMPORTANT FORMATTING RULES: |
| - Use bullet points (not numbered lists) for summary items |
| - Format the Action Items section as a proper markdown table |
| - Keep the summary concise and professional |
| - If information is missing, explicitly note it as TBD |
| - Do not invent information that isn't in the notes""" |
|
|
| messages = [ |
| { |
| "role": "user", |
| "content": prompt |
| } |
| ] |
| |
| response = self.client.chat.completions.create( |
| model=VISION_MODEL, |
| messages=messages, |
| max_tokens=2500, |
| temperature=0.3 |
| ) |
| |
| result = response.choices[0].message.content |
| logger.info(f"Summary generation successful: {len(result)} chars") |
| return result, True |
| |
| except Exception as e: |
| error_msg = str(e) |
| logger.error(f"Summary generation failed: {error_msg}") |
| return f"Summary generation failed: {error_msg[:150]}", False |
|
|
|
|
| |
| hf_client = HFClient(token=HF_TOKEN) |
|
|
|
|
| |
| |
| |
|
|
| def create_word_document(summary_text: str, extracted_text: str) -> Optional[str]: |
| """ |
| Create a Word document from the meeting summary. |
| Returns the file path or None if creation fails. |
| """ |
| try: |
| from docx import Document |
| from docx.shared import Pt |
| from docx.enum.text import WD_ALIGN_PARAGRAPH |
| |
| doc = Document() |
| |
| |
| style = doc.styles['Normal'] |
| style.font.name = 'Arial' |
| style.font.size = Pt(11) |
| |
| |
| title = doc.add_heading('Meeting Notes Summary', 0) |
| title.alignment = WD_ALIGN_PARAGRAPH.CENTER |
| |
| |
| date_para = doc.add_paragraph() |
| date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER |
| date_run = date_para.add_run(f"Generated: {datetime.now().strftime('%B %d, %Y at %I:%M %p')}") |
| date_run.font.size = Pt(10) |
| date_run.font.italic = True |
| |
| doc.add_paragraph() |
| |
| |
| lines = summary_text.split('\n') |
| table_started = False |
| table_rows = [] |
| |
| for line in lines: |
| line = line.strip() |
| if not line: |
| if table_started and table_rows: |
| |
| add_table_to_doc(doc, table_rows) |
| table_rows = [] |
| table_started = False |
| continue |
| |
| |
| if line.startswith('## '): |
| if table_started and table_rows: |
| add_table_to_doc(doc, table_rows) |
| table_rows = [] |
| table_started = False |
| |
| |
| header_text = re.sub(r'[^\w\s\-\(\)]', '', line[3:]).strip() |
| doc.add_heading(header_text, level=1) |
| continue |
| |
| |
| if '|' in line and 'Action Item' in line: |
| table_started = True |
| |
| headers = [h.strip() for h in line.split('|') if h.strip()] |
| table_rows.append(headers) |
| continue |
| |
| |
| if table_started and line.replace('|', '').replace('-', '').replace(':', '').strip() == '': |
| continue |
| |
| |
| if table_started and '|' in line: |
| cells = [c.strip() for c in line.split('|') if c.strip()] |
| if cells: |
| table_rows.append(cells) |
| continue |
| |
| |
| if line.startswith('- ') or line.startswith('* '): |
| doc.add_paragraph(line[2:], style='List Bullet') |
| continue |
| |
| |
| if line and not line.startswith('#'): |
| doc.add_paragraph(line) |
| |
| |
| if table_started and table_rows: |
| add_table_to_doc(doc, table_rows) |
| |
| |
| timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') |
| filepath = f"/tmp/meeting_notes_{timestamp}.docx" |
| doc.save(filepath) |
| logger.info(f"Word document created: {filepath}") |
| return filepath |
| |
| except ImportError: |
| logger.warning("python-docx not installed, skipping Word document creation") |
| return None |
| except Exception as e: |
| logger.error(f"Failed to create Word document: {e}") |
| return None |
|
|
|
|
| def add_table_to_doc(doc, rows: List[List[str]]): |
| """Add a table to the Word document.""" |
| if not rows: |
| return |
| |
| from docx.shared import Pt |
| from docx.enum.table import WD_TABLE_ALIGNMENT |
| |
| num_cols = max(len(row) for row in rows) |
| table = doc.add_table(rows=len(rows), cols=num_cols) |
| table.style = 'Table Grid' |
| table.alignment = WD_TABLE_ALIGNMENT.CENTER |
| |
| for i, row_data in enumerate(rows): |
| row = table.rows[i] |
| for j, cell_text in enumerate(row_data): |
| if j < num_cols: |
| cell = row.cells[j] |
| cell.text = cell_text |
| |
| if i == 0: |
| for paragraph in cell.paragraphs: |
| for run in paragraph.runs: |
| run.font.bold = True |
| run.font.size = Pt(10) |
| |
| doc.add_paragraph() |
|
|
|
|
| |
| |
| |
|
|
| def check_rate_limit(session_id: str) -> Tuple[bool, str]: |
| """Check if request is within rate limits.""" |
| now = time.time() |
| last = session_timestamps.get(session_id, 0) |
| |
| if now - last < MIN_REQUEST_INTERVAL: |
| wait = MIN_REQUEST_INTERVAL - (now - last) |
| return False, f"Please wait {wait:.0f} seconds before trying again." |
| |
| session_timestamps[session_id] = now |
| return True, "" |
|
|
|
|
| |
| |
| |
|
|
| def process_whiteboard_images( |
| images: List[str], |
| meeting_context: str, |
| session_id: str |
| ) -> Tuple[str, str, str, Optional[str]]: |
| """ |
| Main pipeline: Process whiteboard images β Extract text β Generate summary |
| |
| Returns: (status, extracted_text, summary, docx_filepath) |
| """ |
| |
| |
| if not session_id: |
| session_id = "default" |
| |
| |
| rate_ok, rate_msg = check_rate_limit(session_id) |
| if not rate_ok: |
| return f"β³ {rate_msg}", "", "", None |
| |
| |
| if not images or len(images) == 0: |
| return "β Please upload at least one image of whiteboard notes.", "", "", None |
| |
| |
| valid_images = [img for img in images if img is not None] |
| if not valid_images: |
| return "β No valid images found. Please upload whiteboard photos.", "", "", None |
| |
| logger.info(f"Processing {len(valid_images)} image(s)") |
| |
| |
| |
| |
| all_extracted_text = [] |
| |
| for idx, image_path in enumerate(valid_images): |
| status_msg = f"π Extracting text from image {idx + 1} of {len(valid_images)}..." |
| logger.info(status_msg) |
| |
| extracted, success = hf_client.extract_text_from_image(image_path) |
| |
| if not success: |
| return f"β Failed to process image {idx + 1}: {extracted}", "", "", None |
| |
| if len(valid_images) > 1: |
| all_extracted_text.append(f"=== Image {idx + 1} ===\n{extracted}") |
| else: |
| all_extracted_text.append(extracted) |
| |
| combined_text = "\n\n".join(all_extracted_text) |
| |
| if not combined_text.strip(): |
| return "β Could not extract any text from the images. Please ensure the notes are visible.", "", "", None |
| |
| |
| |
| |
| logger.info("Generating meeting summary...") |
| |
| summary, success = hf_client.generate_meeting_summary(combined_text, meeting_context) |
| |
| if not success: |
| return f"β Failed to generate summary: {summary}", combined_text, "", None |
| |
| |
| |
| |
| docx_path = create_word_document(summary, combined_text) |
| |
| |
| |
| |
| status = f"β
Successfully processed {len(valid_images)} image(s)" |
| |
| return status, combined_text, summary, docx_path |
|
|
|
|
| |
| |
| |
|
|
| EXAMPLE_CONTEXT = """Example contexts: |
| β’ "Weekly team standup - Engineering" |
| β’ "Product roadmap planning Q2" |
| β’ "Client meeting - Project Alpha kickoff" |
| β’ "Brainstorming session - New feature ideas" |
| """ |
|
|
| def create_interface(): |
| """Create and configure the Gradio interface.""" |
| |
| with gr.Blocks( |
| title="Whiteboard Notes β Meeting Summary" |
| ) as app: |
| |
| |
| session = gr.State(lambda: hashlib.md5(str(time.time()).encode()).hexdigest()[:8]) |
| |
| |
| gr.Markdown(""" |
| # π Whiteboard Notes β Meeting Summary |
| |
| **Made by :- Yash Chowdhary** |
| |
| **Transform messy whiteboard photos into clean, actionable meeting notes!** |
| |
| Upload photos of your whiteboard or handwritten meeting notes. The AI will: |
| 1. π Extract all text using advanced OCR |
| 2. π Organize into a structured summary |
| 3. β
Identify action items, owners, and due dates |
| 4. π Generate a downloadable Word document |
| |
| > Perfect for pasting into Slack, Notion, or sending via email. |
| """) |
| |
| with gr.Row(): |
| |
| with gr.Column(scale=1): |
| |
| image_input = gr.Image( |
| label="πΈ Upload Whiteboard Photo", |
| type="filepath", |
| height=250, |
| sources=["upload", "clipboard"] |
| ) |
| |
| |
| with gr.Accordion("π Upload Multiple Photos (Optional)", open=False): |
| multi_image_input = gr.File( |
| label="Select multiple whiteboard photos", |
| file_count="multiple", |
| file_types=["image"], |
| type="filepath" |
| ) |
| gr.Markdown("*Upload multiple photos here if you have more than one whiteboard to process*") |
| |
| |
| context_input = gr.Textbox( |
| label="π Meeting Context (Optional)", |
| placeholder="e.g., Weekly team standup, Project kickoff, Brainstorming session...", |
| lines=2, |
| max_lines=3 |
| ) |
| |
| gr.Markdown(EXAMPLE_CONTEXT) |
| |
| |
| process_btn = gr.Button( |
| "π Process Notes", |
| variant="primary", |
| size="lg" |
| ) |
| |
| |
| gr.Markdown("### πΈ Try an Example") |
| |
| |
| example_images = find_example_images() |
| if example_images: |
| gr.Examples( |
| examples=example_images, |
| inputs=image_input, |
| label="Click an image to try it", |
| examples_per_page=4 |
| ) |
| else: |
| gr.Markdown("*No example images found in examples/ folder*") |
| |
| gr.Markdown(""" |
| --- |
| **π‘ Tips for Best Results:** |
| - Use good lighting to capture the whiteboard |
| - Ensure text is in focus and readable |
| - Include the full whiteboard in the frame |
| - For multiple photos, use the "Upload Multiple Photos" section |
| """) |
| |
| |
| with gr.Column(scale=2): |
| |
| status_output = gr.Textbox( |
| label="Status", |
| interactive=False, |
| max_lines=2 |
| ) |
| |
| |
| with gr.Tabs(): |
| with gr.TabItem("π Meeting Summary"): |
| summary_output = gr.Markdown( |
| label="Meeting Summary", |
| value="*Upload whiteboard photos to generate summary...*" |
| ) |
| |
| with gr.TabItem("π€ Extracted Text"): |
| extracted_output = gr.Textbox( |
| label="Raw Extracted Text", |
| lines=15, |
| interactive=False |
| ) |
| |
| with gr.TabItem("π₯ Download"): |
| gr.Markdown("### Download Your Meeting Notes") |
| docx_output = gr.File( |
| label="π Word Document (.docx)", |
| interactive=False |
| ) |
| gr.Markdown(""" |
| *The Word document contains the formatted meeting summary, |
| ready to share or archive.* |
| """) |
| |
| |
| with gr.Accordion("π Copy-Paste Ready (for Slack/Notion)", open=False): |
| gr.Markdown("Select all text below (Ctrl+A) and copy (Ctrl+C) for Slack or Notion:") |
| copy_output = gr.Textbox( |
| label="Plain Text Summary", |
| lines=10, |
| interactive=False |
| ) |
| |
| |
| gr.Markdown(""" |
| --- |
| **How It Works:** |
| 1. πΈ Upload one or more photos of whiteboard/handwritten notes |
| 2. π€ AI extracts text using advanced vision models (handles messy handwriting!) |
| 3. π Text is analyzed and organized into structured meeting notes |
| 4. β
Action items are identified with owners and due dates |
| 5. π Download as Word document or copy to clipboard |
| |
| *Powered by HuggingFace Vision-Language Models and love from Yash Chowdhary* |
| """) |
| |
| |
| def on_process(single_image, multi_images, context, session_id): |
| |
| image_list = [] |
| |
| |
| if single_image is not None: |
| image_list.append(single_image) |
| |
| |
| if multi_images is not None: |
| if isinstance(multi_images, list): |
| image_list.extend([img for img in multi_images if img is not None]) |
| else: |
| image_list.append(multi_images) |
| |
| |
| status, extracted, summary, docx_path = process_whiteboard_images( |
| image_list, context, session_id |
| ) |
| |
| |
| plain_summary = summary.replace('## ', '\n').replace('**', '').replace('*', '') |
| |
| return status, extracted, summary, docx_path, plain_summary |
| |
| |
| process_btn.click( |
| fn=on_process, |
| inputs=[image_input, multi_image_input, context_input, session], |
| outputs=[status_output, extracted_output, summary_output, docx_output, copy_output] |
| ) |
| |
| return app |
|
|
|
|
| |
| |
| |
|
|
| |
| demo = create_interface() |
|
|
| |
| demo.queue(max_size=10, default_concurrency_limit=2) |
|
|
| |
| if __name__ == "__main__": |
| demo.launch( |
| server_name="0.0.0.0", |
| server_port=7860, |
| show_error=True |
| ) |