Update utils.py
Browse files
utils.py
CHANGED
|
@@ -3,8 +3,9 @@ import os
|
|
| 3 |
import tempfile
|
| 4 |
import requests
|
| 5 |
import json
|
|
|
|
| 6 |
from pathlib import Path
|
| 7 |
-
from typing import Optional
|
| 8 |
|
| 9 |
from langchain_openai import ChatOpenAI
|
| 10 |
from langchain_deepseek import ChatDeepSeek
|
|
@@ -22,7 +23,7 @@ DEBUG_MODE = config['DEBUG_MODE']
|
|
| 22 |
|
| 23 |
def check_api_keys():
|
| 24 |
"""Check for the presence of required API keys."""
|
| 25 |
-
required_keys = ['OPENAI_API_KEY', 'DEEPSEEK_API_KEY', 'TAVILY_API_KEY']
|
| 26 |
missing_keys = [key for key in required_keys if not os.environ.get(key)]
|
| 27 |
|
| 28 |
if missing_keys:
|
|
@@ -122,9 +123,98 @@ def download_and_save_task_file(task_id: str, original_filename: str) -> Optiona
|
|
| 122 |
def cleanup_temp_files(temp_file_path) -> None:
|
| 123 |
""" Clean up temporary files created during processing. """
|
| 124 |
try:
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
| 126 |
os.remove(temp_file_path)
|
| 127 |
print(f"Cleaned up temporary file: {temp_file_path}")
|
|
|
|
|
|
|
|
|
|
| 128 |
except Exception as e:
|
| 129 |
print(f"Error cleaning up temp file {temp_file_path}: {str(e)}")
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import tempfile
|
| 4 |
import requests
|
| 5 |
import json
|
| 6 |
+
import re
|
| 7 |
from pathlib import Path
|
| 8 |
+
from typing import Optional, Tuple
|
| 9 |
|
| 10 |
from langchain_openai import ChatOpenAI
|
| 11 |
from langchain_deepseek import ChatDeepSeek
|
|
|
|
| 23 |
|
| 24 |
def check_api_keys():
|
| 25 |
"""Check for the presence of required API keys."""
|
| 26 |
+
required_keys = ['OPENAI_API_KEY', 'DEEPSEEK_API_KEY', 'TAVILY_API_KEY', 'ANTHROPIC_API_KEY', 'GEMINI_API_KEY']
|
| 27 |
missing_keys = [key for key in required_keys if not os.environ.get(key)]
|
| 28 |
|
| 29 |
if missing_keys:
|
|
|
|
| 123 |
def cleanup_temp_files(temp_file_path) -> None:
|
| 124 |
""" Clean up temporary files created during processing. """
|
| 125 |
try:
|
| 126 |
+
# To be safer, ensure temp_file_path is indeed a Path object if Path.unlink() is to be used.
|
| 127 |
+
# Or, if it's a string, os.remove(temp_file_path) is fine.
|
| 128 |
+
# Assuming os.path.exists and os.remove for string paths as per original.
|
| 129 |
+
if isinstance(temp_file_path, str) and temp_file_path.startswith(tempfile.gettempdir()) and os.path.exists(temp_file_path):
|
| 130 |
os.remove(temp_file_path)
|
| 131 |
print(f"Cleaned up temporary file: {temp_file_path}")
|
| 132 |
+
elif isinstance(temp_file_path, Path) and str(temp_file_path).startswith(tempfile.gettempdir()) and temp_file_path.exists():
|
| 133 |
+
temp_file_path.unlink()
|
| 134 |
+
print(f"Cleaned up temporary file: {temp_file_path}")
|
| 135 |
except Exception as e:
|
| 136 |
print(f"Error cleaning up temp file {temp_file_path}: {str(e)}")
|
| 137 |
|
| 138 |
+
def process_file_for_task_v2(task_id: str, question_text: str, api_url: str) -> Tuple[str, Optional[Path]]:
|
| 139 |
+
"""
|
| 140 |
+
Attempts to download a file for a task and appends its path to the question.
|
| 141 |
+
Returns: (potentially modified question_text, path_to_downloaded_file or None)
|
| 142 |
+
"""
|
| 143 |
+
file_download_url = f"{api_url}/files/{task_id}"
|
| 144 |
+
print(f"Attempting to download file for task {task_id} from {file_download_url}")
|
| 145 |
+
local_file_path = None
|
| 146 |
+
|
| 147 |
+
try:
|
| 148 |
+
response = requests.get(file_download_url, timeout=30)
|
| 149 |
+
if response.status_code == 404:
|
| 150 |
+
print(f"No file found for task {task_id} (404). Proceeding without file.")
|
| 151 |
+
return question_text, None
|
| 152 |
+
response.raise_for_status() # Raise an exception for other bad status codes (4xx, 5xx)
|
| 153 |
+
except requests.exceptions.RequestException as exc:
|
| 154 |
+
print(f"Error downloading file for task {task_id}: {exc}. Proceeding without file.")
|
| 155 |
+
return question_text, None
|
| 156 |
+
|
| 157 |
+
# Determine filename from 'Content-Disposition' header
|
| 158 |
+
content_disposition = response.headers.get("content-disposition", "")
|
| 159 |
+
# Adjusted regex to be more robust for quoted and unquoted filenames
|
| 160 |
+
filename_match = re.search(r'filename="?([^"]+)"?', content_disposition)
|
| 161 |
+
|
| 162 |
+
filename_from_header = ""
|
| 163 |
+
if filename_match:
|
| 164 |
+
filename_from_header = filename_match.group(1)
|
| 165 |
+
|
| 166 |
+
# Sanitize and ensure filename is not empty
|
| 167 |
+
if filename_from_header:
|
| 168 |
+
# A more robust sanitization might be needed depending on expected filenames
|
| 169 |
+
# For now, replace non-alphanumeric (excluding ., _, -) with _
|
| 170 |
+
filename = "".join(c if c.isalnum() or c in ('.', '_', '-') else '_' for c in filename_from_header).strip()
|
| 171 |
+
if not filename: # If sanitization results in empty string or just spaces
|
| 172 |
+
print(f"Warning: Sanitized filename from header for task {task_id} is empty. Using task_id as filename base.")
|
| 173 |
+
filename = task_id
|
| 174 |
+
else:
|
| 175 |
+
print(f"Could not determine filename from Content-Disposition for task {task_id}. Using task_id as filename base.")
|
| 176 |
+
filename = task_id
|
| 177 |
+
|
| 178 |
+
# Ensure a reasonable default extension if none is apparent
|
| 179 |
+
if '.' not in Path(filename).suffix: # Check if there's an extension part
|
| 180 |
+
content_type = response.headers.get('Content-Type', '').split(';')[0].strip() # Get MIME type part
|
| 181 |
+
extension = ""
|
| 182 |
+
if content_type == 'image/jpeg': extension = '.jpg'
|
| 183 |
+
elif content_type == 'image/png': extension = '.png'
|
| 184 |
+
elif content_type == 'application/pdf': extension = '.pdf'
|
| 185 |
+
elif content_type == 'text/plain': extension = '.txt'
|
| 186 |
+
elif content_type == 'application/json': extension = '.json'
|
| 187 |
+
elif content_type == 'text/csv': extension = '.csv'
|
| 188 |
+
# Add more mime-type to extension mappings as needed
|
| 189 |
+
|
| 190 |
+
if extension:
|
| 191 |
+
filename += extension
|
| 192 |
+
else:
|
| 193 |
+
print(f"Warning: Could not determine extension for task {task_id} from Content-Type '{content_type}'. Using '.dat'.")
|
| 194 |
+
filename += '.dat' # Generic data extension if type is unknown or unmapped
|
| 195 |
+
|
| 196 |
+
temp_storage_dir = Path(tempfile.gettempdir()) / "hf_space_agent_files"
|
| 197 |
+
temp_storage_dir.mkdir(parents=True, exist_ok=True)
|
| 198 |
+
local_file_path = temp_storage_dir / Path(filename).name # Use Path(filename).name to ensure it's just the filename part
|
| 199 |
+
|
| 200 |
+
try:
|
| 201 |
+
with open(local_file_path, 'wb') as f:
|
| 202 |
+
f.write(response.content)
|
| 203 |
+
print(f"File for task {task_id} saved to: {local_file_path}")
|
| 204 |
+
amended_question = (
|
| 205 |
+
f"{question_text}\n\n"
|
| 206 |
+
f"--- Technical Information ---\n"
|
| 207 |
+
f"A file relevant to this task was downloaded and is available to your tools at the following local path. "
|
| 208 |
+
f"Your tools that can read local files (like read_file, extract_text_from_image, etc.) should use this path:\n"
|
| 209 |
+
f"Local file path: {str(local_file_path)}\n"
|
| 210 |
+
f"--- End Technical Information ---\n\n"
|
| 211 |
+
)
|
| 212 |
+
return amended_question, local_file_path
|
| 213 |
+
except IOError as e:
|
| 214 |
+
print(f"Error saving file {local_file_path} for task {task_id}: {e}")
|
| 215 |
+
return question_text, None # Saving failed
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
|