michaelarutyunov commited on
Commit
515e162
·
verified ·
1 Parent(s): bc59e50

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +93 -3
utils.py CHANGED
@@ -3,8 +3,9 @@ import os
3
  import tempfile
4
  import requests
5
  import json
 
6
  from pathlib import Path
7
- from typing import Optional
8
 
9
  from langchain_openai import ChatOpenAI
10
  from langchain_deepseek import ChatDeepSeek
@@ -22,7 +23,7 @@ DEBUG_MODE = config['DEBUG_MODE']
22
 
23
  def check_api_keys():
24
  """Check for the presence of required API keys."""
25
- required_keys = ['OPENAI_API_KEY', 'DEEPSEEK_API_KEY', 'TAVILY_API_KEY']
26
  missing_keys = [key for key in required_keys if not os.environ.get(key)]
27
 
28
  if missing_keys:
@@ -122,9 +123,98 @@ def download_and_save_task_file(task_id: str, original_filename: str) -> Optiona
122
  def cleanup_temp_files(temp_file_path) -> None:
123
  """ Clean up temporary files created during processing. """
124
  try:
125
- if temp_file_path.startswith(tempfile.gettempdir()) and os.path.exists(temp_file_path):
 
 
 
126
  os.remove(temp_file_path)
127
  print(f"Cleaned up temporary file: {temp_file_path}")
 
 
 
128
  except Exception as e:
129
  print(f"Error cleaning up temp file {temp_file_path}: {str(e)}")
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import tempfile
4
  import requests
5
  import json
6
+ import re
7
  from pathlib import Path
8
+ from typing import Optional, Tuple
9
 
10
  from langchain_openai import ChatOpenAI
11
  from langchain_deepseek import ChatDeepSeek
 
23
 
24
  def check_api_keys():
25
  """Check for the presence of required API keys."""
26
+ required_keys = ['OPENAI_API_KEY', 'DEEPSEEK_API_KEY', 'TAVILY_API_KEY', 'ANTHROPIC_API_KEY', 'GEMINI_API_KEY']
27
  missing_keys = [key for key in required_keys if not os.environ.get(key)]
28
 
29
  if missing_keys:
 
123
  def cleanup_temp_files(temp_file_path) -> None:
124
  """ Clean up temporary files created during processing. """
125
  try:
126
+ # To be safer, ensure temp_file_path is indeed a Path object if Path.unlink() is to be used.
127
+ # Or, if it's a string, os.remove(temp_file_path) is fine.
128
+ # Assuming os.path.exists and os.remove for string paths as per original.
129
+ if isinstance(temp_file_path, str) and temp_file_path.startswith(tempfile.gettempdir()) and os.path.exists(temp_file_path):
130
  os.remove(temp_file_path)
131
  print(f"Cleaned up temporary file: {temp_file_path}")
132
+ elif isinstance(temp_file_path, Path) and str(temp_file_path).startswith(tempfile.gettempdir()) and temp_file_path.exists():
133
+ temp_file_path.unlink()
134
+ print(f"Cleaned up temporary file: {temp_file_path}")
135
  except Exception as e:
136
  print(f"Error cleaning up temp file {temp_file_path}: {str(e)}")
137
 
138
+ def process_file_for_task_v2(task_id: str, question_text: str, api_url: str) -> Tuple[str, Optional[Path]]:
139
+ """
140
+ Attempts to download a file for a task and appends its path to the question.
141
+ Returns: (potentially modified question_text, path_to_downloaded_file or None)
142
+ """
143
+ file_download_url = f"{api_url}/files/{task_id}"
144
+ print(f"Attempting to download file for task {task_id} from {file_download_url}")
145
+ local_file_path = None
146
+
147
+ try:
148
+ response = requests.get(file_download_url, timeout=30)
149
+ if response.status_code == 404:
150
+ print(f"No file found for task {task_id} (404). Proceeding without file.")
151
+ return question_text, None
152
+ response.raise_for_status() # Raise an exception for other bad status codes (4xx, 5xx)
153
+ except requests.exceptions.RequestException as exc:
154
+ print(f"Error downloading file for task {task_id}: {exc}. Proceeding without file.")
155
+ return question_text, None
156
+
157
+ # Determine filename from 'Content-Disposition' header
158
+ content_disposition = response.headers.get("content-disposition", "")
159
+ # Adjusted regex to be more robust for quoted and unquoted filenames
160
+ filename_match = re.search(r'filename="?([^"]+)"?', content_disposition)
161
+
162
+ filename_from_header = ""
163
+ if filename_match:
164
+ filename_from_header = filename_match.group(1)
165
+
166
+ # Sanitize and ensure filename is not empty
167
+ if filename_from_header:
168
+ # A more robust sanitization might be needed depending on expected filenames
169
+ # For now, replace non-alphanumeric (excluding ., _, -) with _
170
+ filename = "".join(c if c.isalnum() or c in ('.', '_', '-') else '_' for c in filename_from_header).strip()
171
+ if not filename: # If sanitization results in empty string or just spaces
172
+ print(f"Warning: Sanitized filename from header for task {task_id} is empty. Using task_id as filename base.")
173
+ filename = task_id
174
+ else:
175
+ print(f"Could not determine filename from Content-Disposition for task {task_id}. Using task_id as filename base.")
176
+ filename = task_id
177
+
178
+ # Ensure a reasonable default extension if none is apparent
179
+ if '.' not in Path(filename).suffix: # Check if there's an extension part
180
+ content_type = response.headers.get('Content-Type', '').split(';')[0].strip() # Get MIME type part
181
+ extension = ""
182
+ if content_type == 'image/jpeg': extension = '.jpg'
183
+ elif content_type == 'image/png': extension = '.png'
184
+ elif content_type == 'application/pdf': extension = '.pdf'
185
+ elif content_type == 'text/plain': extension = '.txt'
186
+ elif content_type == 'application/json': extension = '.json'
187
+ elif content_type == 'text/csv': extension = '.csv'
188
+ # Add more mime-type to extension mappings as needed
189
+
190
+ if extension:
191
+ filename += extension
192
+ else:
193
+ print(f"Warning: Could not determine extension for task {task_id} from Content-Type '{content_type}'. Using '.dat'.")
194
+ filename += '.dat' # Generic data extension if type is unknown or unmapped
195
+
196
+ temp_storage_dir = Path(tempfile.gettempdir()) / "hf_space_agent_files"
197
+ temp_storage_dir.mkdir(parents=True, exist_ok=True)
198
+ local_file_path = temp_storage_dir / Path(filename).name # Use Path(filename).name to ensure it's just the filename part
199
+
200
+ try:
201
+ with open(local_file_path, 'wb') as f:
202
+ f.write(response.content)
203
+ print(f"File for task {task_id} saved to: {local_file_path}")
204
+ amended_question = (
205
+ f"{question_text}\n\n"
206
+ f"--- Technical Information ---\n"
207
+ f"A file relevant to this task was downloaded and is available to your tools at the following local path. "
208
+ f"Your tools that can read local files (like read_file, extract_text_from_image, etc.) should use this path:\n"
209
+ f"Local file path: {str(local_file_path)}\n"
210
+ f"--- End Technical Information ---\n\n"
211
+ )
212
+ return amended_question, local_file_path
213
+ except IOError as e:
214
+ print(f"Error saving file {local_file_path} for task {task_id}: {e}")
215
+ return question_text, None # Saving failed
216
+
217
+
218
+
219
+
220
+