akhaliq HF Staff commited on
Commit
4ceee61
·
1 Parent(s): 3bbc203

fix requirements txt generation

Browse files
Files changed (1) hide show
  1. backend_deploy.py +255 -6
backend_deploy.py CHANGED
@@ -8,10 +8,12 @@ import json
8
  import uuid
9
  import tempfile
10
  import shutil
 
11
  from typing import Dict, List, Optional, Tuple
12
  from pathlib import Path
13
 
14
  from huggingface_hub import HfApi
 
15
 
16
 
17
  def parse_html_code(code: str) -> str:
@@ -138,6 +140,233 @@ def parse_python_requirements(code: str) -> Optional[str]:
138
  return None
139
 
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  def parse_multi_file_python_output(code: str) -> Dict[str, str]:
142
  """Parse multi-file Python output (e.g., Gradio, Streamlit)"""
143
  files = {}
@@ -459,12 +688,22 @@ def deploy_to_huggingface_space(
459
  file_path.parent.mkdir(parents=True, exist_ok=True)
460
  file_path.write_text(content, encoding='utf-8')
461
 
462
- # Ensure requirements.txt exists
463
  if "requirements.txt" not in files:
464
- if language == "gradio":
465
- (temp_path / "requirements.txt").write_text("gradio>=4.0.0\n", encoding='utf-8')
466
- elif language == "streamlit":
467
- (temp_path / "requirements.txt").write_text("streamlit>=1.30.0\n", encoding='utf-8')
 
 
 
 
 
 
 
 
 
 
468
 
469
  # Create Dockerfile if needed
470
  if sdk == "docker":
@@ -505,8 +744,18 @@ def deploy_to_huggingface_space(
505
  file_path.parent.mkdir(parents=True, exist_ok=True)
506
  file_path.write_text(content, encoding='utf-8')
507
 
 
508
  if "requirements.txt" not in files:
509
- (temp_path / "requirements.txt").write_text("gradio>=4.0.0\n", encoding='utf-8')
 
 
 
 
 
 
 
 
 
510
 
511
  # Don't create README - HuggingFace will auto-generate it
512
  # We'll add the anycoder tag after deployment
 
8
  import uuid
9
  import tempfile
10
  import shutil
11
+ import ast
12
  from typing import Dict, List, Optional, Tuple
13
  from pathlib import Path
14
 
15
  from huggingface_hub import HfApi
16
+ from backend_models import get_inference_client, get_real_model_id
17
 
18
 
19
  def parse_html_code(code: str) -> str:
 
140
  return None
141
 
142
 
143
+ def strip_tool_call_markers(text):
144
+ """Remove TOOL_CALL markers and thinking tags that some LLMs add to their output."""
145
+ if not text:
146
+ return text
147
+ # Remove [TOOL_CALL] and [/TOOL_CALL] markers
148
+ text = re.sub(r'\[/?TOOL_CALL\]', '', text, flags=re.IGNORECASE)
149
+ # Remove <think> and </think> tags and their content
150
+ text = re.sub(r'<think>[\s\S]*?</think>', '', text, flags=re.IGNORECASE)
151
+ # Remove any remaining unclosed <think> tags at the start
152
+ text = re.sub(r'^<think>[\s\S]*?(?=\n|$)', '', text, flags=re.IGNORECASE | re.MULTILINE)
153
+ # Remove any remaining </think> tags
154
+ text = re.sub(r'</think>', '', text, flags=re.IGNORECASE)
155
+ # Remove standalone }} that appears with tool calls
156
+ # Only remove if it's on its own line or at the end
157
+ text = re.sub(r'^\s*\}\}\s*$', '', text, flags=re.MULTILINE)
158
+ return text.strip()
159
+
160
+
161
+ def remove_code_block(text):
162
+ """Remove code block markers from text."""
163
+ # First strip any tool call markers
164
+ text = strip_tool_call_markers(text)
165
+
166
+ # Try to match code blocks with language markers
167
+ patterns = [
168
+ r'```(?:html|HTML)\n([\s\S]+?)\n```', # Match ```html or ```HTML
169
+ r'```\n([\s\S]+?)\n```', # Match code blocks without language markers
170
+ r'```([\s\S]+?)```' # Match code blocks without line breaks
171
+ ]
172
+ for pattern in patterns:
173
+ match = re.search(pattern, text, re.DOTALL)
174
+ if match:
175
+ extracted = match.group(1).strip()
176
+ # Remove a leading language marker line (e.g., 'python') if present
177
+ if extracted.split('\n', 1)[0].strip().lower() in ['python', 'html', 'css', 'javascript', 'json', 'c', 'cpp', 'markdown', 'latex', 'jinja2', 'typescript', 'yaml', 'dockerfile', 'shell', 'r', 'sql']:
178
+ return extracted.split('\n', 1)[1] if '\n' in extracted else ''
179
+ return extracted
180
+ # If no code block is found, return as-is
181
+ return text.strip()
182
+
183
+
184
+ def extract_import_statements(code):
185
+ """Extract import statements from generated code."""
186
+ import_statements = []
187
+
188
+ # Built-in Python modules to exclude
189
+ builtin_modules = {
190
+ 'os', 'sys', 'json', 'time', 'datetime', 'random', 'math', 're', 'collections',
191
+ 'itertools', 'functools', 'pathlib', 'urllib', 'http', 'email', 'html', 'xml',
192
+ 'csv', 'tempfile', 'shutil', 'subprocess', 'threading', 'multiprocessing',
193
+ 'asyncio', 'logging', 'typing', 'base64', 'hashlib', 'secrets', 'uuid',
194
+ 'copy', 'pickle', 'io', 'contextlib', 'warnings', 'sqlite3', 'gzip', 'zipfile',
195
+ 'tarfile', 'socket', 'ssl', 'platform', 'getpass', 'pwd', 'grp', 'stat',
196
+ 'glob', 'fnmatch', 'linecache', 'traceback', 'inspect', 'keyword', 'token',
197
+ 'tokenize', 'ast', 'code', 'codeop', 'dis', 'py_compile', 'compileall',
198
+ 'importlib', 'pkgutil', 'modulefinder', 'runpy', 'site', 'sysconfig'
199
+ }
200
+
201
+ try:
202
+ # Try to parse as Python AST
203
+ tree = ast.parse(code)
204
+
205
+ for node in ast.walk(tree):
206
+ if isinstance(node, ast.Import):
207
+ for alias in node.names:
208
+ module_name = alias.name.split('.')[0]
209
+ if module_name not in builtin_modules and not module_name.startswith('_'):
210
+ import_statements.append(f"import {alias.name}")
211
+
212
+ elif isinstance(node, ast.ImportFrom):
213
+ if node.module:
214
+ module_name = node.module.split('.')[0]
215
+ if module_name not in builtin_modules and not module_name.startswith('_'):
216
+ names = [alias.name for alias in node.names]
217
+ import_statements.append(f"from {node.module} import {', '.join(names)}")
218
+
219
+ except SyntaxError:
220
+ # Fallback: use regex to find import statements
221
+ for line in code.split('\n'):
222
+ line = line.strip()
223
+ if line.startswith('import ') or line.startswith('from '):
224
+ # Check if it's not a builtin module
225
+ if line.startswith('import '):
226
+ module_name = line.split()[1].split('.')[0]
227
+ elif line.startswith('from '):
228
+ module_name = line.split()[1].split('.')[0]
229
+
230
+ if module_name not in builtin_modules and not module_name.startswith('_'):
231
+ import_statements.append(line)
232
+
233
+ return list(set(import_statements)) # Remove duplicates
234
+
235
+
236
+ def generate_requirements_txt_with_llm(import_statements):
237
+ """Generate requirements.txt content using LLM based on import statements."""
238
+ if not import_statements:
239
+ return "# No additional dependencies required\n"
240
+
241
+ # Use a lightweight model for this task
242
+ try:
243
+ client = get_inference_client("zai-org/GLM-4.6", "auto")
244
+ actual_model_id = get_real_model_id("zai-org/GLM-4.6")
245
+
246
+ imports_text = '\n'.join(import_statements)
247
+
248
+ prompt = f"""Based on the following Python import statements, generate a comprehensive requirements.txt file with all necessary and commonly used related packages:
249
+
250
+ {imports_text}
251
+
252
+ Instructions:
253
+ - Include the direct packages needed for the imports
254
+ - Include commonly used companion packages and dependencies for better functionality
255
+ - Use correct PyPI package names (e.g., PIL -> Pillow, sklearn -> scikit-learn)
256
+ - IMPORTANT: For diffusers, ALWAYS use: git+https://github.com/huggingface/diffusers
257
+ - IMPORTANT: For transformers, ALWAYS use: git+https://github.com/huggingface/transformers
258
+ - IMPORTANT: If diffusers is installed, also include transformers and sentencepiece as they usually go together
259
+ - Examples of comprehensive dependencies:
260
+ * diffusers often needs: git+https://github.com/huggingface/transformers, sentencepiece, accelerate, torch, tokenizers
261
+ * transformers often needs: accelerate, torch, tokenizers, datasets
262
+ * gradio often needs: requests, Pillow for image handling
263
+ * pandas often needs: numpy, openpyxl for Excel files
264
+ * matplotlib often needs: numpy, pillow for image saving
265
+ * sklearn often needs: numpy, scipy, joblib
266
+ * streamlit often needs: pandas, numpy, requests
267
+ * opencv-python often needs: numpy, pillow
268
+ * fastapi often needs: uvicorn, pydantic
269
+ * torch often needs: torchvision, torchaudio (if doing computer vision/audio)
270
+ - Include packages for common file formats if relevant (openpyxl, python-docx, PyPDF2)
271
+ - Do not include Python built-in modules
272
+ - Do not specify versions unless there are known compatibility issues
273
+ - One package per line
274
+ - If no external packages are needed, return "# No additional dependencies required"
275
+
276
+ 🚨 CRITICAL OUTPUT FORMAT:
277
+ - Output ONLY the package names, one per line (plain text format)
278
+ - Do NOT use markdown formatting (no ```, no bold, no headings, no lists)
279
+ - Do NOT add any explanatory text before or after the package list
280
+ - Do NOT wrap the output in code blocks
281
+ - Just output raw package names as they would appear in requirements.txt
282
+
283
+ Generate a comprehensive requirements.txt that ensures the application will work smoothly:"""
284
+
285
+ messages = [
286
+ {"role": "system", "content": "You are a Python packaging expert specializing in creating comprehensive, production-ready requirements.txt files. Output ONLY plain text package names without any markdown formatting, code blocks, or explanatory text. Your goal is to ensure applications work smoothly by including not just direct dependencies but also commonly needed companion packages, popular extensions, and supporting libraries that developers typically need together."},
287
+ {"role": "user", "content": prompt}
288
+ ]
289
+
290
+ response = client.chat.completions.create(
291
+ model=actual_model_id,
292
+ messages=messages,
293
+ max_tokens=1024,
294
+ temperature=0.1
295
+ )
296
+
297
+ requirements_content = response.choices[0].message.content.strip()
298
+
299
+ # Clean up the response in case it includes extra formatting
300
+ if '```' in requirements_content:
301
+ requirements_content = remove_code_block(requirements_content)
302
+
303
+ # Enhanced cleanup for markdown and formatting
304
+ lines = requirements_content.split('\n')
305
+ clean_lines = []
306
+ for line in lines:
307
+ stripped_line = line.strip()
308
+
309
+ # Skip lines that are markdown formatting
310
+ if (stripped_line == '```' or
311
+ stripped_line.startswith('```') or
312
+ stripped_line.startswith('#') and not stripped_line.startswith('# ') or # Skip markdown headers but keep comments
313
+ stripped_line.startswith('**') or # Skip bold text
314
+ stripped_line.startswith('*') and not stripped_line[1:2].isalnum() or # Skip markdown lists but keep package names starting with *
315
+ stripped_line.startswith('-') and not stripped_line[1:2].isalnum() or # Skip markdown lists but keep package names starting with -
316
+ stripped_line.startswith('===') or # Skip section dividers
317
+ stripped_line.startswith('---') or # Skip horizontal rules
318
+ stripped_line.lower().startswith('here') or # Skip explanatory text
319
+ stripped_line.lower().startswith('this') or # Skip explanatory text
320
+ stripped_line.lower().startswith('the') or # Skip explanatory text
321
+ stripped_line.lower().startswith('based on') or # Skip explanatory text
322
+ stripped_line == ''): # Skip empty lines unless they're at natural boundaries
323
+ continue
324
+
325
+ # Keep lines that look like valid package specifications
326
+ # Valid lines: package names, git+https://, comments starting with "# "
327
+ if (stripped_line.startswith('# ') or # Valid comments
328
+ stripped_line.startswith('git+') or # Git dependencies
329
+ stripped_line[0].isalnum() or # Package names start with alphanumeric
330
+ '==' in stripped_line or # Version specifications
331
+ '>=' in stripped_line or # Version specifications
332
+ '<=' in stripped_line): # Version specifications
333
+ clean_lines.append(line)
334
+
335
+ requirements_content = '\n'.join(clean_lines).strip()
336
+
337
+ # Ensure it ends with a newline
338
+ if requirements_content and not requirements_content.endswith('\n'):
339
+ requirements_content += '\n'
340
+
341
+ return requirements_content if requirements_content else "# No additional dependencies required\n"
342
+
343
+ except Exception as e:
344
+ # Fallback: simple extraction with basic mapping
345
+ print(f"[Deploy] Warning: LLM requirements generation failed: {e}, using fallback")
346
+ dependencies = set()
347
+ special_cases = {
348
+ 'PIL': 'Pillow',
349
+ 'sklearn': 'scikit-learn',
350
+ 'skimage': 'scikit-image',
351
+ 'bs4': 'beautifulsoup4'
352
+ }
353
+
354
+ for stmt in import_statements:
355
+ if stmt.startswith('import '):
356
+ module_name = stmt.split()[1].split('.')[0]
357
+ package_name = special_cases.get(module_name, module_name)
358
+ dependencies.add(package_name)
359
+ elif stmt.startswith('from '):
360
+ module_name = stmt.split()[1].split('.')[0]
361
+ package_name = special_cases.get(module_name, module_name)
362
+ dependencies.add(package_name)
363
+
364
+ if dependencies:
365
+ return '\n'.join(sorted(dependencies)) + '\n'
366
+ else:
367
+ return "# No additional dependencies required\n"
368
+
369
+
370
  def parse_multi_file_python_output(code: str) -> Dict[str, str]:
371
  """Parse multi-file Python output (e.g., Gradio, Streamlit)"""
372
  files = {}
 
688
  file_path.parent.mkdir(parents=True, exist_ok=True)
689
  file_path.write_text(content, encoding='utf-8')
690
 
691
+ # Ensure requirements.txt exists - generate from imports if missing
692
  if "requirements.txt" not in files:
693
+ # Get the main app file (app.py for gradio, streamlit_app.py or app.py for streamlit)
694
+ main_app = files.get('streamlit_app.py') or files.get('app.py', '')
695
+ if main_app:
696
+ print(f"[Deploy] Generating requirements.txt from imports in {language} app")
697
+ import_statements = extract_import_statements(main_app)
698
+ requirements_content = generate_requirements_txt_with_llm(import_statements)
699
+ (temp_path / "requirements.txt").write_text(requirements_content, encoding='utf-8')
700
+ print(f"[Deploy] Generated requirements.txt with {len(requirements_content.splitlines())} lines")
701
+ else:
702
+ # Fallback to minimal requirements if no app file found
703
+ if language == "gradio":
704
+ (temp_path / "requirements.txt").write_text("gradio>=4.0.0\n", encoding='utf-8')
705
+ elif language == "streamlit":
706
+ (temp_path / "requirements.txt").write_text("streamlit>=1.30.0\n", encoding='utf-8')
707
 
708
  # Create Dockerfile if needed
709
  if sdk == "docker":
 
744
  file_path.parent.mkdir(parents=True, exist_ok=True)
745
  file_path.write_text(content, encoding='utf-8')
746
 
747
+ # Generate requirements.txt from imports if missing
748
  if "requirements.txt" not in files:
749
+ main_app = files.get('app.py', '')
750
+ if main_app:
751
+ print(f"[Deploy] Generating requirements.txt from imports in default app")
752
+ import_statements = extract_import_statements(main_app)
753
+ requirements_content = generate_requirements_txt_with_llm(import_statements)
754
+ (temp_path / "requirements.txt").write_text(requirements_content, encoding='utf-8')
755
+ print(f"[Deploy] Generated requirements.txt with {len(requirements_content.splitlines())} lines")
756
+ else:
757
+ # Fallback to minimal requirements if no app file found
758
+ (temp_path / "requirements.txt").write_text("gradio>=4.0.0\n", encoding='utf-8')
759
 
760
  # Don't create README - HuggingFace will auto-generate it
761
  # We'll add the anycoder tag after deployment