Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,27 +17,33 @@ os.makedirs(TEMP_DIR, exist_ok=True)
|
|
| 17 |
|
| 18 |
def parse_repo2markdown(text):
|
| 19 |
components = []
|
|
|
|
| 20 |
pattern = re.compile(r'### File: (.*?)\n([\s\S]*?)(?=\n### File:|\Z)', re.MULTILINE)
|
| 21 |
-
|
| 22 |
-
if
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
| 26 |
for match in pattern.finditer(text):
|
| 27 |
filename = match.group(1).strip()
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
return components
|
| 35 |
|
| 36 |
def parse_standard_readme(text):
|
| 37 |
components = []
|
| 38 |
parts = re.split(r'^(## .*?)$', text, flags=re.MULTILINE)
|
| 39 |
if parts[0].strip():
|
| 40 |
-
components.append({'type': 'intro', 'filename': 'Header', 'content': parts[0].strip()})
|
| 41 |
for i in range(1, len(parts), 2):
|
| 42 |
components.append({'type': 'section', 'filename': parts[i].replace('##', '').strip(), 'content': parts[i+1].strip()})
|
| 43 |
return components
|
|
@@ -103,18 +109,23 @@ def parse_endpoint():
|
|
| 103 |
if 'markdown_file' in request.files and request.files['markdown_file'].filename != '':
|
| 104 |
text = request.files['markdown_file'].read().decode('utf-8')
|
| 105 |
try:
|
|
|
|
| 106 |
if "### HF_ACTION:" in text or "File and Code Formatting:" in text:
|
| 107 |
format_name, components = "Agent Action", parse_agent_action(text)
|
|
|
|
| 108 |
elif "## File Structure" in text and "### File:" in text:
|
| 109 |
format_name, components = "Repo2Markdown", parse_repo2markdown(text)
|
|
|
|
| 110 |
elif re.search(r'^## \[\d+\.\d+\.\d+.*?\].*?$', text, flags=re.MULTILINE):
|
| 111 |
format_name, components = "Changelog", parse_changelog(text)
|
| 112 |
-
|
|
|
|
| 113 |
format_name, components = "Standard README", parse_standard_readme(text)
|
| 114 |
else:
|
| 115 |
format_name, components = "Unknown", [{'type': 'text', 'filename': 'Full Text', 'content': text}]
|
| 116 |
return jsonify({'format': format_name, 'components': components})
|
| 117 |
except Exception as e:
|
|
|
|
| 118 |
return jsonify({'error': str(e)}), 500
|
| 119 |
|
| 120 |
@app.route('/convert', methods=['POST'])
|
|
@@ -153,8 +164,10 @@ def index():
|
|
| 153 |
.btn-dark { background: #333; color: #fff; }
|
| 154 |
.preview-container { background: #fff; border: 1px solid #ddd; margin-top: 20px; padding: 15px; overflow: auto; }
|
| 155 |
.component-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 15px; margin-top: 10px; }
|
| 156 |
-
.comp-card { background: #fff; border: 1px solid #ddd; padding: 10px; border-radius: 4px; }
|
| 157 |
.dark-mode .comp-card { background: #333; border-color: #444; }
|
|
|
|
|
|
|
| 158 |
</style>
|
| 159 |
</head>
|
| 160 |
<body>
|
|
@@ -213,7 +226,14 @@ def index():
|
|
| 213 |
if(data.error) return alert(data.error);
|
| 214 |
const list = document.getElementById('comp-list'); list.innerHTML = '';
|
| 215 |
data.components.forEach((c, i) => {
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
});
|
| 218 |
document.getElementById('comp-section').style.display = 'block';
|
| 219 |
}
|
|
@@ -221,7 +241,17 @@ def index():
|
|
| 221 |
let text = "";
|
| 222 |
const checks = document.querySelectorAll('.c-check');
|
| 223 |
if(checks.length > 0) {
|
| 224 |
-
checks.forEach(c => {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
} else { text = document.getElementById('md-input').value; }
|
| 226 |
return {
|
| 227 |
markdown_text: text,
|
|
|
|
| 17 |
|
| 18 |
def parse_repo2markdown(text):
|
| 19 |
components = []
|
| 20 |
+
# Improved pattern to capture content between file headers more reliably
|
| 21 |
pattern = re.compile(r'### File: (.*?)\n([\s\S]*?)(?=\n### File:|\Z)', re.MULTILINE)
|
| 22 |
+
|
| 23 |
+
# Capture intro/file structure as the first component if present
|
| 24 |
+
struct_match = re.search(r'## File Structure\n([\s\S]*?)(?=\n### File:|\Z)', text)
|
| 25 |
+
if struct_match:
|
| 26 |
+
components.append({'type': 'structure', 'filename': 'File Structure', 'content': struct_match.group(1).strip()})
|
| 27 |
+
|
| 28 |
for match in pattern.finditer(text):
|
| 29 |
filename = match.group(1).strip()
|
| 30 |
+
content = match.group(2).strip()
|
| 31 |
+
# Clean up code blocks if they wrap the entire file content
|
| 32 |
+
code_match = re.search(r'^```(?:\w*)\s*\n([\s\S]*?)\s*```$', content, re.DOTALL)
|
| 33 |
+
final_content = code_match.group(1).strip() if code_match else content
|
| 34 |
+
components.append({
|
| 35 |
+
'type': 'file',
|
| 36 |
+
'filename': filename,
|
| 37 |
+
'content': final_content,
|
| 38 |
+
'is_code_block': True if code_match else False
|
| 39 |
+
})
|
| 40 |
return components
|
| 41 |
|
| 42 |
def parse_standard_readme(text):
|
| 43 |
components = []
|
| 44 |
parts = re.split(r'^(## .*?)$', text, flags=re.MULTILINE)
|
| 45 |
if parts[0].strip():
|
| 46 |
+
components.append({'type': 'intro', 'filename': 'Header/Intro', 'content': parts[0].strip()})
|
| 47 |
for i in range(1, len(parts), 2):
|
| 48 |
components.append({'type': 'section', 'filename': parts[i].replace('##', '').strip(), 'content': parts[i+1].strip()})
|
| 49 |
return components
|
|
|
|
| 109 |
if 'markdown_file' in request.files and request.files['markdown_file'].filename != '':
|
| 110 |
text = request.files['markdown_file'].read().decode('utf-8')
|
| 111 |
try:
|
| 112 |
+
# Priority 1: Agent Action
|
| 113 |
if "### HF_ACTION:" in text or "File and Code Formatting:" in text:
|
| 114 |
format_name, components = "Agent Action", parse_agent_action(text)
|
| 115 |
+
# Priority 2: Repo2Markdown (Checking for File Structure and Files anywhere in text)
|
| 116 |
elif "## File Structure" in text and "### File:" in text:
|
| 117 |
format_name, components = "Repo2Markdown", parse_repo2markdown(text)
|
| 118 |
+
# Priority 3: Changelog
|
| 119 |
elif re.search(r'^## \[\d+\.\d+\.\d+.*?\].*?$', text, flags=re.MULTILINE):
|
| 120 |
format_name, components = "Changelog", parse_changelog(text)
|
| 121 |
+
# Priority 4: Standard README (Relaxed start anchor to handle chatbot intros)
|
| 122 |
+
elif re.search(r'^# ', text, flags=re.MULTILINE) and re.search(r'^## ', text, flags=re.MULTILINE):
|
| 123 |
format_name, components = "Standard README", parse_standard_readme(text)
|
| 124 |
else:
|
| 125 |
format_name, components = "Unknown", [{'type': 'text', 'filename': 'Full Text', 'content': text}]
|
| 126 |
return jsonify({'format': format_name, 'components': components})
|
| 127 |
except Exception as e:
|
| 128 |
+
traceback.print_exc()
|
| 129 |
return jsonify({'error': str(e)}), 500
|
| 130 |
|
| 131 |
@app.route('/convert', methods=['POST'])
|
|
|
|
| 164 |
.btn-dark { background: #333; color: #fff; }
|
| 165 |
.preview-container { background: #fff; border: 1px solid #ddd; margin-top: 20px; padding: 15px; overflow: auto; }
|
| 166 |
.component-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 15px; margin-top: 10px; }
|
| 167 |
+
.comp-card { background: #fff; border: 1px solid #ddd; padding: 10px; border-radius: 4px; overflow: hidden; }
|
| 168 |
.dark-mode .comp-card { background: #333; border-color: #444; }
|
| 169 |
+
.comp-card textarea { height: 60px; font-size: 10px; background: #fafafa; }
|
| 170 |
+
.dark-mode .comp-card textarea { background: #222; }
|
| 171 |
</style>
|
| 172 |
</head>
|
| 173 |
<body>
|
|
|
|
| 226 |
if(data.error) return alert(data.error);
|
| 227 |
const list = document.getElementById('comp-list'); list.innerHTML = '';
|
| 228 |
data.components.forEach((c, i) => {
|
| 229 |
+
const safeContent = btoa(unescape(encodeURIComponent(c.content)));
|
| 230 |
+
list.innerHTML += `
|
| 231 |
+
<div class="comp-card">
|
| 232 |
+
<input type="checkbox" checked class="c-check" data-filename="${c.filename}" data-content="${safeContent}">
|
| 233 |
+
<strong>${c.filename}</strong><br>
|
| 234 |
+
<small>${c.type}</small>
|
| 235 |
+
<textarea readonly>${c.content.substring(0, 100)}...</textarea>
|
| 236 |
+
</div>`;
|
| 237 |
});
|
| 238 |
document.getElementById('comp-section').style.display = 'block';
|
| 239 |
}
|
|
|
|
| 241 |
let text = "";
|
| 242 |
const checks = document.querySelectorAll('.c-check');
|
| 243 |
if(checks.length > 0) {
|
| 244 |
+
checks.forEach(c => {
|
| 245 |
+
if(c.checked) {
|
| 246 |
+
const content = decodeURIComponent(escape(atob(c.dataset.content)));
|
| 247 |
+
const filename = c.dataset.filename;
|
| 248 |
+
// Reconstruct header based on presence of file-like names
|
| 249 |
+
if (filename !== "File Structure" && filename !== "Header/Intro") {
|
| 250 |
+
text += "### File: " + filename + "\\n";
|
| 251 |
+
}
|
| 252 |
+
text += content + "\\n\\n";
|
| 253 |
+
}
|
| 254 |
+
});
|
| 255 |
} else { text = document.getElementById('md-input').value; }
|
| 256 |
return {
|
| 257 |
markdown_text: text,
|