Spaces:

broadfield-dev
/

markdown2png

Paused

App Files Files Community

broadfield-dev commited on Dec 22, 2025

Commit

d23a27f

verified ·

1 Parent(s): 5b80a53

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -16

app.py CHANGED Viewed

@@ -17,27 +17,33 @@ os.makedirs(TEMP_DIR, exist_ok=True)
 def parse_repo2markdown(text):
     components = []
     pattern = re.compile(r'### File: (.*?)\n([\s\S]*?)(?=\n### File:|\Z)', re.MULTILINE)
-    first_match = pattern.search(text)
-    if first_match:
-        intro_text = text[:first_match.start()].strip()
-        if intro_text:
-            components.append({'type': 'intro', 'filename': 'Introduction', 'content': intro_text})
     for match in pattern.finditer(text):
         filename = match.group(1).strip()
-        raw_content = match.group(2).strip()
-        code_match = re.search(r'^```(\w*)\s*\n([\s\S]*?)\s*```$', raw_content, re.DOTALL)
-        if code_match:
-            components.append({'type': 'file', 'filename': filename, 'content': code_match.group(2).strip(), 'is_code_block': True, 'language': code_match.group(1)})
-        else:
-            components.append({'type': 'file', 'filename': filename, 'content': raw_content, 'is_code_block': False})
     return components
 def parse_standard_readme(text):
     components = []
     parts = re.split(r'^(## .*?)$', text, flags=re.MULTILINE)
     if parts[0].strip():
-        components.append({'type': 'intro', 'filename': 'Header', 'content': parts[0].strip()})
     for i in range(1, len(parts), 2):
         components.append({'type': 'section', 'filename': parts[i].replace('##', '').strip(), 'content': parts[i+1].strip()})
     return components
@@ -103,18 +109,23 @@ def parse_endpoint():
     if 'markdown_file' in request.files and request.files['markdown_file'].filename != '':
         text = request.files['markdown_file'].read().decode('utf-8')
     try:
         if "### HF_ACTION:" in text or "File and Code Formatting:" in text:
             format_name, components = "Agent Action", parse_agent_action(text)
         elif "## File Structure" in text and "### File:" in text:
             format_name, components = "Repo2Markdown", parse_repo2markdown(text)
         elif re.search(r'^## \[\d+\.\d+\.\d+.*?\].*?$', text, flags=re.MULTILINE):
             format_name, components = "Changelog", parse_changelog(text)
-        elif text.strip().startswith("#") and re.search(r'^## ', text, flags=re.MULTILINE):
             format_name, components = "Standard README", parse_standard_readme(text)
         else:
             format_name, components = "Unknown", [{'type': 'text', 'filename': 'Full Text', 'content': text}]
         return jsonify({'format': format_name, 'components': components})
     except Exception as e:
         return jsonify({'error': str(e)}), 500
 @app.route('/convert', methods=['POST'])
@@ -153,8 +164,10 @@ def index():
             .btn-dark { background: #333; color: #fff; }
             .preview-container { background: #fff; border: 1px solid #ddd; margin-top: 20px; padding: 15px; overflow: auto; }
             .component-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 15px; margin-top: 10px; }
-            .comp-card { background: #fff; border: 1px solid #ddd; padding: 10px; border-radius: 4px; }
             .dark-mode .comp-card { background: #333; border-color: #444; }
         </style>
     </head>
     <body>
@@ -213,7 +226,14 @@ def index():
                 if(data.error) return alert(data.error);
                 const list = document.getElementById('comp-list'); list.innerHTML = '';
                 data.components.forEach((c, i) => {
-                    list.innerHTML += `<div class="comp-card"><input type="checkbox" checked class="c-check" data-content="${btoa(unescape(encodeURIComponent(c.content)))}"> <strong>${c.filename}</strong><br><small>${c.type}</small></div>`;
                 });
                 document.getElementById('comp-section').style.display = 'block';
             }
@@ -221,7 +241,17 @@ def index():
                 let text = "";
                 const checks = document.querySelectorAll('.c-check');
                 if(checks.length > 0) {
-                    checks.forEach(c => { if(c.checked) text += decodeURIComponent(escape(atob(c.dataset.content))) + "\\n\\n"; });
                 } else { text = document.getElementById('md-input').value; }
                 return {
                     markdown_text: text,

 def parse_repo2markdown(text):
     components = []
+    # Improved pattern to capture content between file headers more reliably
     pattern = re.compile(r'### File: (.*?)\n([\s\S]*?)(?=\n### File:|\Z)', re.MULTILINE)
+    # Capture intro/file structure as the first component if present
+    struct_match = re.search(r'## File Structure\n([\s\S]*?)(?=\n### File:|\Z)', text)
+    if struct_match:
+        components.append({'type': 'structure', 'filename': 'File Structure', 'content': struct_match.group(1).strip()})
     for match in pattern.finditer(text):
         filename = match.group(1).strip()
+        content = match.group(2).strip()
+        # Clean up code blocks if they wrap the entire file content
+        code_match = re.search(r'^```(?:\w*)\s*\n([\s\S]*?)\s*```$', content, re.DOTALL)
+        final_content = code_match.group(1).strip() if code_match else content
+        components.append({
+            'type': 'file',
+            'filename': filename,
+            'content': final_content,
+            'is_code_block': True if code_match else False
+        })
     return components
 def parse_standard_readme(text):
     components = []
     parts = re.split(r'^(## .*?)$', text, flags=re.MULTILINE)
     if parts[0].strip():
+        components.append({'type': 'intro', 'filename': 'Header/Intro', 'content': parts[0].strip()})
     for i in range(1, len(parts), 2):
         components.append({'type': 'section', 'filename': parts[i].replace('##', '').strip(), 'content': parts[i+1].strip()})
     return components
     if 'markdown_file' in request.files and request.files['markdown_file'].filename != '':
         text = request.files['markdown_file'].read().decode('utf-8')
     try:
+        # Priority 1: Agent Action
         if "### HF_ACTION:" in text or "File and Code Formatting:" in text:
             format_name, components = "Agent Action", parse_agent_action(text)
+        # Priority 2: Repo2Markdown (Checking for File Structure and Files anywhere in text)
         elif "## File Structure" in text and "### File:" in text:
             format_name, components = "Repo2Markdown", parse_repo2markdown(text)
+        # Priority 3: Changelog
         elif re.search(r'^## \[\d+\.\d+\.\d+.*?\].*?$', text, flags=re.MULTILINE):
             format_name, components = "Changelog", parse_changelog(text)
+        # Priority 4: Standard README (Relaxed start anchor to handle chatbot intros)
+        elif re.search(r'^# ', text, flags=re.MULTILINE) and re.search(r'^## ', text, flags=re.MULTILINE):
             format_name, components = "Standard README", parse_standard_readme(text)
         else:
             format_name, components = "Unknown", [{'type': 'text', 'filename': 'Full Text', 'content': text}]
         return jsonify({'format': format_name, 'components': components})
     except Exception as e:
+        traceback.print_exc()
         return jsonify({'error': str(e)}), 500
 @app.route('/convert', methods=['POST'])
             .btn-dark { background: #333; color: #fff; }
             .preview-container { background: #fff; border: 1px solid #ddd; margin-top: 20px; padding: 15px; overflow: auto; }
             .component-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 15px; margin-top: 10px; }
+            .comp-card { background: #fff; border: 1px solid #ddd; padding: 10px; border-radius: 4px; overflow: hidden; }
             .dark-mode .comp-card { background: #333; border-color: #444; }
+            .comp-card textarea { height: 60px; font-size: 10px; background: #fafafa; }
+            .dark-mode .comp-card textarea { background: #222; }
         </style>
     </head>
     <body>
                 if(data.error) return alert(data.error);
                 const list = document.getElementById('comp-list'); list.innerHTML = '';
                 data.components.forEach((c, i) => {
+                    const safeContent = btoa(unescape(encodeURIComponent(c.content)));
+                    list.innerHTML += `
+                        <div class="comp-card">
+                            <input type="checkbox" checked class="c-check" data-filename="${c.filename}" data-content="${safeContent}">
+                            <strong>${c.filename}</strong><br>
+                            <small>${c.type}</small>
+                            <textarea readonly>${c.content.substring(0, 100)}...</textarea>
+                        </div>`;
                 });
                 document.getElementById('comp-section').style.display = 'block';
             }
                 let text = "";
                 const checks = document.querySelectorAll('.c-check');
                 if(checks.length > 0) {
+                    checks.forEach(c => {
+                        if(c.checked) {
+                            const content = decodeURIComponent(escape(atob(c.dataset.content)));
+                            const filename = c.dataset.filename;
+                            // Reconstruct header based on presence of file-like names
+                            if (filename !== "File Structure" && filename !== "Header/Intro") {
+                                text += "### File: " + filename + "\\n";
+                            }
+                            text += content + "\\n\\n";
+                        }
+                    });
                 } else { text = document.getElementById('md-input').value; }
                 return {
                     markdown_text: text,