broadfield-dev commited on
Commit
d23a27f
·
verified ·
1 Parent(s): 5b80a53

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -16
app.py CHANGED
@@ -17,27 +17,33 @@ os.makedirs(TEMP_DIR, exist_ok=True)
17
 
18
  def parse_repo2markdown(text):
19
  components = []
 
20
  pattern = re.compile(r'### File: (.*?)\n([\s\S]*?)(?=\n### File:|\Z)', re.MULTILINE)
21
- first_match = pattern.search(text)
22
- if first_match:
23
- intro_text = text[:first_match.start()].strip()
24
- if intro_text:
25
- components.append({'type': 'intro', 'filename': 'Introduction', 'content': intro_text})
 
26
  for match in pattern.finditer(text):
27
  filename = match.group(1).strip()
28
- raw_content = match.group(2).strip()
29
- code_match = re.search(r'^```(\w*)\s*\n([\s\S]*?)\s*```$', raw_content, re.DOTALL)
30
- if code_match:
31
- components.append({'type': 'file', 'filename': filename, 'content': code_match.group(2).strip(), 'is_code_block': True, 'language': code_match.group(1)})
32
- else:
33
- components.append({'type': 'file', 'filename': filename, 'content': raw_content, 'is_code_block': False})
 
 
 
 
34
  return components
35
 
36
  def parse_standard_readme(text):
37
  components = []
38
  parts = re.split(r'^(## .*?)$', text, flags=re.MULTILINE)
39
  if parts[0].strip():
40
- components.append({'type': 'intro', 'filename': 'Header', 'content': parts[0].strip()})
41
  for i in range(1, len(parts), 2):
42
  components.append({'type': 'section', 'filename': parts[i].replace('##', '').strip(), 'content': parts[i+1].strip()})
43
  return components
@@ -103,18 +109,23 @@ def parse_endpoint():
103
  if 'markdown_file' in request.files and request.files['markdown_file'].filename != '':
104
  text = request.files['markdown_file'].read().decode('utf-8')
105
  try:
 
106
  if "### HF_ACTION:" in text or "File and Code Formatting:" in text:
107
  format_name, components = "Agent Action", parse_agent_action(text)
 
108
  elif "## File Structure" in text and "### File:" in text:
109
  format_name, components = "Repo2Markdown", parse_repo2markdown(text)
 
110
  elif re.search(r'^## \[\d+\.\d+\.\d+.*?\].*?$', text, flags=re.MULTILINE):
111
  format_name, components = "Changelog", parse_changelog(text)
112
- elif text.strip().startswith("#") and re.search(r'^## ', text, flags=re.MULTILINE):
 
113
  format_name, components = "Standard README", parse_standard_readme(text)
114
  else:
115
  format_name, components = "Unknown", [{'type': 'text', 'filename': 'Full Text', 'content': text}]
116
  return jsonify({'format': format_name, 'components': components})
117
  except Exception as e:
 
118
  return jsonify({'error': str(e)}), 500
119
 
120
  @app.route('/convert', methods=['POST'])
@@ -153,8 +164,10 @@ def index():
153
  .btn-dark { background: #333; color: #fff; }
154
  .preview-container { background: #fff; border: 1px solid #ddd; margin-top: 20px; padding: 15px; overflow: auto; }
155
  .component-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 15px; margin-top: 10px; }
156
- .comp-card { background: #fff; border: 1px solid #ddd; padding: 10px; border-radius: 4px; }
157
  .dark-mode .comp-card { background: #333; border-color: #444; }
 
 
158
  </style>
159
  </head>
160
  <body>
@@ -213,7 +226,14 @@ def index():
213
  if(data.error) return alert(data.error);
214
  const list = document.getElementById('comp-list'); list.innerHTML = '';
215
  data.components.forEach((c, i) => {
216
- list.innerHTML += `<div class="comp-card"><input type="checkbox" checked class="c-check" data-content="${btoa(unescape(encodeURIComponent(c.content)))}"> <strong>${c.filename}</strong><br><small>${c.type}</small></div>`;
 
 
 
 
 
 
 
217
  });
218
  document.getElementById('comp-section').style.display = 'block';
219
  }
@@ -221,7 +241,17 @@ def index():
221
  let text = "";
222
  const checks = document.querySelectorAll('.c-check');
223
  if(checks.length > 0) {
224
- checks.forEach(c => { if(c.checked) text += decodeURIComponent(escape(atob(c.dataset.content))) + "\\n\\n"; });
 
 
 
 
 
 
 
 
 
 
225
  } else { text = document.getElementById('md-input').value; }
226
  return {
227
  markdown_text: text,
 
17
 
18
  def parse_repo2markdown(text):
19
  components = []
20
+ # Improved pattern to capture content between file headers more reliably
21
  pattern = re.compile(r'### File: (.*?)\n([\s\S]*?)(?=\n### File:|\Z)', re.MULTILINE)
22
+
23
+ # Capture intro/file structure as the first component if present
24
+ struct_match = re.search(r'## File Structure\n([\s\S]*?)(?=\n### File:|\Z)', text)
25
+ if struct_match:
26
+ components.append({'type': 'structure', 'filename': 'File Structure', 'content': struct_match.group(1).strip()})
27
+
28
  for match in pattern.finditer(text):
29
  filename = match.group(1).strip()
30
+ content = match.group(2).strip()
31
+ # Clean up code blocks if they wrap the entire file content
32
+ code_match = re.search(r'^```(?:\w*)\s*\n([\s\S]*?)\s*```$', content, re.DOTALL)
33
+ final_content = code_match.group(1).strip() if code_match else content
34
+ components.append({
35
+ 'type': 'file',
36
+ 'filename': filename,
37
+ 'content': final_content,
38
+ 'is_code_block': True if code_match else False
39
+ })
40
  return components
41
 
42
  def parse_standard_readme(text):
43
  components = []
44
  parts = re.split(r'^(## .*?)$', text, flags=re.MULTILINE)
45
  if parts[0].strip():
46
+ components.append({'type': 'intro', 'filename': 'Header/Intro', 'content': parts[0].strip()})
47
  for i in range(1, len(parts), 2):
48
  components.append({'type': 'section', 'filename': parts[i].replace('##', '').strip(), 'content': parts[i+1].strip()})
49
  return components
 
109
  if 'markdown_file' in request.files and request.files['markdown_file'].filename != '':
110
  text = request.files['markdown_file'].read().decode('utf-8')
111
  try:
112
+ # Priority 1: Agent Action
113
  if "### HF_ACTION:" in text or "File and Code Formatting:" in text:
114
  format_name, components = "Agent Action", parse_agent_action(text)
115
+ # Priority 2: Repo2Markdown (Checking for File Structure and Files anywhere in text)
116
  elif "## File Structure" in text and "### File:" in text:
117
  format_name, components = "Repo2Markdown", parse_repo2markdown(text)
118
+ # Priority 3: Changelog
119
  elif re.search(r'^## \[\d+\.\d+\.\d+.*?\].*?$', text, flags=re.MULTILINE):
120
  format_name, components = "Changelog", parse_changelog(text)
121
+ # Priority 4: Standard README (Relaxed start anchor to handle chatbot intros)
122
+ elif re.search(r'^# ', text, flags=re.MULTILINE) and re.search(r'^## ', text, flags=re.MULTILINE):
123
  format_name, components = "Standard README", parse_standard_readme(text)
124
  else:
125
  format_name, components = "Unknown", [{'type': 'text', 'filename': 'Full Text', 'content': text}]
126
  return jsonify({'format': format_name, 'components': components})
127
  except Exception as e:
128
+ traceback.print_exc()
129
  return jsonify({'error': str(e)}), 500
130
 
131
  @app.route('/convert', methods=['POST'])
 
164
  .btn-dark { background: #333; color: #fff; }
165
  .preview-container { background: #fff; border: 1px solid #ddd; margin-top: 20px; padding: 15px; overflow: auto; }
166
  .component-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 15px; margin-top: 10px; }
167
+ .comp-card { background: #fff; border: 1px solid #ddd; padding: 10px; border-radius: 4px; overflow: hidden; }
168
  .dark-mode .comp-card { background: #333; border-color: #444; }
169
+ .comp-card textarea { height: 60px; font-size: 10px; background: #fafafa; }
170
+ .dark-mode .comp-card textarea { background: #222; }
171
  </style>
172
  </head>
173
  <body>
 
226
  if(data.error) return alert(data.error);
227
  const list = document.getElementById('comp-list'); list.innerHTML = '';
228
  data.components.forEach((c, i) => {
229
+ const safeContent = btoa(unescape(encodeURIComponent(c.content)));
230
+ list.innerHTML += `
231
+ <div class="comp-card">
232
+ <input type="checkbox" checked class="c-check" data-filename="${c.filename}" data-content="${safeContent}">
233
+ <strong>${c.filename}</strong><br>
234
+ <small>${c.type}</small>
235
+ <textarea readonly>${c.content.substring(0, 100)}...</textarea>
236
+ </div>`;
237
  });
238
  document.getElementById('comp-section').style.display = 'block';
239
  }
 
241
  let text = "";
242
  const checks = document.querySelectorAll('.c-check');
243
  if(checks.length > 0) {
244
+ checks.forEach(c => {
245
+ if(c.checked) {
246
+ const content = decodeURIComponent(escape(atob(c.dataset.content)));
247
+ const filename = c.dataset.filename;
248
+ // Reconstruct header based on presence of file-like names
249
+ if (filename !== "File Structure" && filename !== "Header/Intro") {
250
+ text += "### File: " + filename + "\\n";
251
+ }
252
+ text += content + "\\n\\n";
253
+ }
254
+ });
255
  } else { text = document.getElementById('md-input').value; }
256
  return {
257
  markdown_text: text,