Spaces:

michaelarutyunov
/

HuggingFace_Agent-Course_final-assignment

Runtime error

App Files Files Community

michaelarutyunov commited on May 8, 2025

Commit

515e162

verified ·

1 Parent(s): bc59e50

Update utils.py

Browse files

Files changed (1) hide show

utils.py +93 -3

utils.py CHANGED Viewed

@@ -3,8 +3,9 @@ import os
 import tempfile
 import requests
 import json
 from pathlib import Path
-from typing import Optional
 from langchain_openai import ChatOpenAI
 from langchain_deepseek import ChatDeepSeek
@@ -22,7 +23,7 @@ DEBUG_MODE = config['DEBUG_MODE']
 def check_api_keys():
     """Check for the presence of required API keys."""
-    required_keys = ['OPENAI_API_KEY', 'DEEPSEEK_API_KEY', 'TAVILY_API_KEY']
     missing_keys = [key for key in required_keys if not os.environ.get(key)]
     if missing_keys:
@@ -122,9 +123,98 @@ def download_and_save_task_file(task_id: str, original_filename: str) -> Optiona
 def cleanup_temp_files(temp_file_path) -> None:
     """ Clean up temporary files created during processing. """
     try:
-        if temp_file_path.startswith(tempfile.gettempdir()) and os.path.exists(temp_file_path):
             os.remove(temp_file_path)
             print(f"Cleaned up temporary file: {temp_file_path}")
     except Exception as e:
         print(f"Error cleaning up temp file {temp_file_path}: {str(e)}")

 import tempfile
 import requests
 import json
+import re
 from pathlib import Path
+from typing import Optional, Tuple
 from langchain_openai import ChatOpenAI
 from langchain_deepseek import ChatDeepSeek
 def check_api_keys():
     """Check for the presence of required API keys."""
+    required_keys = ['OPENAI_API_KEY', 'DEEPSEEK_API_KEY', 'TAVILY_API_KEY', 'ANTHROPIC_API_KEY', 'GEMINI_API_KEY']
     missing_keys = [key for key in required_keys if not os.environ.get(key)]
     if missing_keys:
 def cleanup_temp_files(temp_file_path) -> None:
     """ Clean up temporary files created during processing. """
     try:
+        # To be safer, ensure temp_file_path is indeed a Path object if Path.unlink() is to be used.
+        # Or, if it's a string, os.remove(temp_file_path) is fine.
+        # Assuming os.path.exists and os.remove for string paths as per original.
+        if isinstance(temp_file_path, str) and temp_file_path.startswith(tempfile.gettempdir()) and os.path.exists(temp_file_path):
             os.remove(temp_file_path)
             print(f"Cleaned up temporary file: {temp_file_path}")
+        elif isinstance(temp_file_path, Path) and str(temp_file_path).startswith(tempfile.gettempdir()) and temp_file_path.exists():
+            temp_file_path.unlink()
+            print(f"Cleaned up temporary file: {temp_file_path}")
     except Exception as e:
         print(f"Error cleaning up temp file {temp_file_path}: {str(e)}")
+def process_file_for_task_v2(task_id: str, question_text: str, api_url: str) -> Tuple[str, Optional[Path]]:
+    """
+    Attempts to download a file for a task and appends its path to the question.
+    Returns: (potentially modified question_text, path_to_downloaded_file or None)
+    """
+    file_download_url = f"{api_url}/files/{task_id}"
+    print(f"Attempting to download file for task {task_id} from {file_download_url}")
+    local_file_path = None
+    try:
+        response = requests.get(file_download_url, timeout=30)
+        if response.status_code == 404:
+            print(f"No file found for task {task_id} (404). Proceeding without file.")
+            return question_text, None
+        response.raise_for_status() # Raise an exception for other bad status codes (4xx, 5xx)
+    except requests.exceptions.RequestException as exc:
+        print(f"Error downloading file for task {task_id}: {exc}. Proceeding without file.")
+        return question_text, None
+    # Determine filename from 'Content-Disposition' header
+    content_disposition = response.headers.get("content-disposition", "")
+    # Adjusted regex to be more robust for quoted and unquoted filenames
+    filename_match = re.search(r'filename="?([^"]+)"?', content_disposition)
+    filename_from_header = ""
+    if filename_match:
+        filename_from_header = filename_match.group(1)
+    # Sanitize and ensure filename is not empty
+    if filename_from_header:
+        # A more robust sanitization might be needed depending on expected filenames
+        # For now, replace non-alphanumeric (excluding ., _, -) with _
+        filename = "".join(c if c.isalnum() or c in ('.', '_', '-') else '_' for c in filename_from_header).strip()
+        if not filename: # If sanitization results in empty string or just spaces
+            print(f"Warning: Sanitized filename from header for task {task_id} is empty. Using task_id as filename base.")
+            filename = task_id
+    else:
+        print(f"Could not determine filename from Content-Disposition for task {task_id}. Using task_id as filename base.")
+        filename = task_id
+    # Ensure a reasonable default extension if none is apparent
+    if '.' not in Path(filename).suffix: # Check if there's an extension part
+        content_type = response.headers.get('Content-Type', '').split(';')[0].strip() # Get MIME type part
+        extension = ""
+        if content_type == 'image/jpeg': extension = '.jpg'
+        elif content_type == 'image/png': extension = '.png'
+        elif content_type == 'application/pdf': extension = '.pdf'
+        elif content_type == 'text/plain': extension = '.txt'
+        elif content_type == 'application/json': extension = '.json'
+        elif content_type == 'text/csv': extension = '.csv'
+        # Add more mime-type to extension mappings as needed
+        if extension:
+            filename += extension
+        else:
+            print(f"Warning: Could not determine extension for task {task_id} from Content-Type '{content_type}'. Using '.dat'.")
+            filename += '.dat' # Generic data extension if type is unknown or unmapped
+    temp_storage_dir = Path(tempfile.gettempdir()) / "hf_space_agent_files"
+    temp_storage_dir.mkdir(parents=True, exist_ok=True)
+    local_file_path = temp_storage_dir / Path(filename).name # Use Path(filename).name to ensure it's just the filename part
+    try:
+        with open(local_file_path, 'wb') as f:
+            f.write(response.content)
+        print(f"File for task {task_id} saved to: {local_file_path}")
+        amended_question = (
+            f"{question_text}\n\n"
+            f"--- Technical Information ---\n"
+            f"A file relevant to this task was downloaded and is available to your tools at the following local path. "
+            f"Your tools that can read local files (like read_file, extract_text_from_image, etc.) should use this path:\n"
+            f"Local file path: {str(local_file_path)}\n"
+            f"--- End Technical Information ---\n\n"
+        )
+        return amended_question, local_file_path
+    except IOError as e:
+        print(f"Error saving file {local_file_path} for task {task_id}: {e}")
+        return question_text, None # Saving failed