Final_Assignment_Template

Sleeping

App Files Files Community

abhi1294 commited on 29 days ago

Commit

5b48ced

1 Parent(s): 64f6aad

Fix prompts and utils

Browse files

Files changed (1) hide show

tools.py +33 -268

tools.py CHANGED Viewed

@@ -1,224 +1,38 @@
-# from __future__ import annotations
-# import io
-# import json
-# import os
-# from pathlib import Path
-# from typing import Optional
-# import pandas as pd
-# import requests
-# class TaskFileTool:
-#     """
-#     Downloads and reads task-linked files from the Hugging Face
-#     Unit 4 scoring API.
-#     Supported text extration:
-#     - txt
-#     - csv
-#     - json
-#     - md
-#     - html
-#     - xml
-#     For unsupported or binary files, it safely returns an empty string for now.
-#     We can extend this later for PDF/images if needed.
-#     """
-#     def __init__(self, api_base_url: str, cache_dir:str = "task_files", timeout: int =30):
-#         self.api_base_url = api_base_url.rstrip("/")
-#         self.cache_dir = Path(cache_dir)
-#         self.cache_dir.mkdir(parents=True, exist_ok=True)
-#         self.timeout = timeout
-#     def get_task_context(self, task_id: str) -> str:
-#         """
-#         Main entry point used by the agent:
-#         1. download the task file if present
-#         2. read it into text context if supported
-#         """
-#         file_path = self.download_task_file(task_id)
-#         if file_path is None:
-#             return ""
-#         return self.read_file_as_text(file_path)
-#     def download_task_file(self, task_id: str) -> Optional[Path]:
-#         """
-#         Downloads the file linked to a task_id using:
-#         GET /files/{task_id}
-#         Returns:
-#             Path to saved file if successful, else None
-#         """
-#         url = f"{self.api_base_url}/files/{task_id}"
-#         try:
-#             response = requests.get(url, timeout=self.timeout)
-#         except requests.RequestException:
-#             return None
-#         if response.status_code !=200:
-#             return None
-#         filename = self._infer_filename(response=response, task_id=task_id)
-#         file_path = self.cache_dir / filename
-#         try:
-#             with open(file_path, "wb") as f:
-#                 f.write(response.content)
-#             return file_path
-#         except OSError:
-#             return None
-#         return file_path
-#     def read_file_as_text(self, file_path: Path) -> str:
-#         """
-#         Reads supported file types into plain text.
-#         """
-#         suffix = file_path.suffix.lower()
-#         try:
-#             if suffix in {".txt", ".md", ".html", ".xml", ".csv", ".json"}:
-#                 return self._read_supported_text_file(file_path, suffix)
-#             # common fallback for files saved without extension but actually text
-#             if suffix == "":
-#                 return self._read_extensionless_file(file_path)
-#             return ""
-#         except Exception:
-#             return ""
-#     def _read_supported_text_file(self, file_path: Path, suffix: str) -> str:
-#         if suffix in {".txt", ".md", ".html", ".xml"}:
-#             return file_path.read_text(encoding="utf-8", errors="ignore")
-#         if suffix == ".json":
-#             raw = file_path.read_text(encoding="utf-8", errors="ignore")
-#             try:
-#                 parsed = json.loads(raw)
-#                 return json.dumps(parsed, indent=2, ensure_ascii=False)
-#             except json.JSONDecodeError:
-#                 return raw
-#         if suffix == ".csv":
-#             try:
-#                 df = pd.read_csv(file_path)
-#                 return df.to_csv(index=False)
-#             except Exception:
-#                 return file_path.read_text(encoding="utf-8", errors="ignore")
-#         return ""
-#     def _read_extensionless_file(self, file_path: Path) -> str:
-#         """
-#         Try to interpret extensionless files as utf-8 text first.
-#         """
-#         try:
-#             raw = file_path.read_text(encoding="utf-8", errors="ignore")
-#             if raw.strip():
-#                 return raw
-#         except Exception:
-#             pass
-#         return ""
-#     def _infer_filename(self, response: requests.Response, task_id: str) -> str:
-#         """
-#         Attempts to infer a useful filename from headers.
-#         Falls back to task_id if no filename is available.
-#         """
-#         content_disposition = response.headers.get("content-disposition", "")
-#         filename = self._extract_filename_from_content_disposition(content_disposition)
-#         if filename:
-#             return self._safe_filename(filename)
-#         content_type = response.headers.get("content-type", "").lower()
-#         extension = self._extension_from_content_type(content_type)
-#         if extension:
-#             return f"{task_id}{extension}"
-#         return str(task_id)
-#     @staticmethod
-#     def _extract_filename_from_content_disposition(content_disposition: str) -> Optional[str]:
-#         """
-#         Example header:
-#         content-disposition: attachment; filename="example.csv"
-#         """
-#         if "filename=" not in content_disposition:
-#             return None
-#         try:
-#             filename = content_disposition.split("filename=")[-1].strip().strip('"')
-#             return filename or None
-#         except Exception:
-#             return None
-#     @staticmethod
-#     def _extension_from_content_type(content_type: str) -> str:
-#         mapping = {
-#             "text/plain": ".txt",
-#             "text/csv": ".csv",
-#             "application/csv": ".csv",
-#             "application/json": ".json",
-#             "text/markdown": ".md",
-#             "text/html": ".html",
-#             "application/xml": ".xml",
-#             "text/xml": ".xml",
-#         }
-#         for key, ext in mapping.items():
-#             if key in content_type:
-#                 return ext
-#         return ""
-#     @staticmethod
-#     def _safe_filename(filename: str) -> str:
-#         """
-#         Prevent path traversal and weird path issues.
-#         """
-#         return os.path.basename(filename)
 from __future__ import annotations
 import json
 import os
 from pathlib import Path
 from typing import Optional
 import pandas as pd
 import requests
 class TaskFileTool:
     """
-    Downloads and reads task-linked files from the Hugging Face Unit 4 scoring API.
-    Supported extraction:
     - txt
     - md
     - html
     - xml
-    - csv
-    - tsv
-    - json
-    - xlsx
-    - xls
-    For unsupported or binary files, it safely returns an empty string.
     """
-    def __init__(self, api_base_url: str, cache_dir: str = "task_files", timeout: int = 30):
         self.api_base_url = api_base_url.rstrip("/")
         self.cache_dir = Path(cache_dir)
         self.cache_dir.mkdir(parents=True, exist_ok=True)
         self.timeout = timeout
     def get_task_context(self, task_id: str) -> str:
         """
-        Main entry point:
         1. download the task file if present
         2. read it into text context if supported
         """
@@ -231,19 +45,20 @@ class TaskFileTool:
         """
         Downloads the file linked to a task_id using:
         GET /files/{task_id}
         """
-        url = f"{self.api_base_url}/files/{task_id}"
         try:
             response = requests.get(url, timeout=self.timeout)
-        except requests.RequestException as e:
-            print(f"Task file download error for {task_id}: {e}")
             return None
-        if response.status_code != 200:
-            print(f"No task file for {task_id}. Status code: {response.status_code}")
             return None
         filename = self._infer_filename(response=response, task_id=task_id)
         file_path = self.cache_dir / filename
@@ -251,10 +66,10 @@ class TaskFileTool:
             with open(file_path, "wb") as f:
                 f.write(response.content)
             return file_path
-        except OSError as e:
-            print(f"Failed to save task file for {task_id}: {e}")
             return None
     def read_file_as_text(self, file_path: Path) -> str:
         """
         Reads supported file types into plain text.
@@ -262,19 +77,15 @@ class TaskFileTool:
         suffix = file_path.suffix.lower()
         try:
-            if suffix in {".txt", ".md", ".html", ".xml", ".csv", ".tsv", ".json"}:
                 return self._read_supported_text_file(file_path, suffix)
-            if suffix in {".xlsx", ".xls"}:
-                return self._read_excel_file(file_path)
             if suffix == "":
                 return self._read_extensionless_file(file_path)
-            print(f"Unsupported file type: {suffix} for {file_path.name}")
             return ""
-        except Exception as e:
-            print(f"Failed to read file {file_path.name}: {e}")
             return ""
     def _read_supported_text_file(self, file_path: Path, suffix: str) -> str:
@@ -292,62 +103,12 @@ class TaskFileTool:
         if suffix == ".csv":
             try:
                 df = pd.read_csv(file_path)
-                return self._dataframe_to_context(df, file_path.name)
-            except Exception:
-                return file_path.read_text(encoding="utf-8", errors="ignore")
-        if suffix == ".tsv":
-            try:
-                df = pd.read_csv(file_path, sep="\t")
-                return self._dataframe_to_context(df, file_path.name)
             except Exception:
                 return file_path.read_text(encoding="utf-8", errors="ignore")
         return ""
-    def _read_excel_file(self, file_path: Path) -> str:
-        """
-        Reads Excel files and formats all sheets into a compact text context.
-        """
-        try:
-            excel_file = pd.ExcelFile(file_path)
-            sheet_texts = []
-            for sheet_name in excel_file.sheet_names:
-                try:
-                    df = pd.read_excel(file_path, sheet_name=sheet_name)
-                    rendered = self._dataframe_to_context(df, f"{file_path.name}::{sheet_name}")
-                    sheet_texts.append(f"Sheet: {sheet_name}\n{rendered}")
-                except Exception as e:
-                    sheet_texts.append(f"Sheet: {sheet_name}\n[Unreadable sheet: {e}]")
-            return "\n\n".join(sheet_texts).strip()
-        except Exception as e:
-            print(f"Excel read error for {file_path.name}: {e}")
-            return ""
-    def _dataframe_to_context(self, df: pd.DataFrame, label: str) -> str:
-        """
-        Convert a dataframe into a more LLM-friendly text block.
-        """
-        if df is None or df.empty:
-            return f"File: {label}\n[Empty table]"
-        preview_rows = min(len(df), 50)
-        preview_df = df.head(preview_rows).copy()
-        # Normalize NaN for readability
-        preview_df = preview_df.fillna("")
-        columns = ", ".join(str(c) for c in preview_df.columns.tolist())
-        return (
-            f"File: {label}\n"
-            f"Columns: {columns}\n"
-            f"Row count preview: {preview_rows}\n"
-            f"Data preview:\n{preview_df.to_csv(index=False)}"
-        )
     def _read_extensionless_file(self, file_path: Path) -> str:
         """
         Try to interpret extensionless files as utf-8 text first.
@@ -381,6 +142,10 @@ class TaskFileTool:
     @staticmethod
     def _extract_filename_from_content_disposition(content_disposition: str) -> Optional[str]:
         if "filename=" not in content_disposition:
             return None
@@ -396,14 +161,11 @@ class TaskFileTool:
             "text/plain": ".txt",
             "text/csv": ".csv",
             "application/csv": ".csv",
-            "text/tab-separated-values": ".tsv",
             "application/json": ".json",
             "text/markdown": ".md",
             "text/html": ".html",
             "application/xml": ".xml",
             "text/xml": ".xml",
-            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
-            "application/vnd.ms-excel": ".xls",
         }
         for key, ext in mapping.items():
@@ -414,4 +176,7 @@ class TaskFileTool:
     @staticmethod
     def _safe_filename(filename: str) -> str:
         return os.path.basename(filename)

 from __future__ import annotations
+import io
 import json
 import os
 from pathlib import Path
 from typing import Optional
 import pandas as pd
 import requests
 class TaskFileTool:
     """
+    Downloads and reads task-linked files from the Hugging Face
+    Unit 4 scoring API.
+    Supported text extration:
     - txt
+    - csv
+    - json
     - md
     - html
     - xml
+    For unsupported or binary files, it safely returns an empty string for now.
+    We can extend this later for PDF/images if needed.
     """
+    def __init__(self, api_base_url: str, cache_dir:str = "task_files", timeout: int =30):
         self.api_base_url = api_base_url.rstrip("/")
         self.cache_dir = Path(cache_dir)
         self.cache_dir.mkdir(parents=True, exist_ok=True)
         self.timeout = timeout
     def get_task_context(self, task_id: str) -> str:
         """
+        Main entry point used by the agent:
         1. download the task file if present
         2. read it into text context if supported
         """
         """
         Downloads the file linked to a task_id using:
         GET /files/{task_id}
+        Returns:
+            Path to saved file if successful, else None
         """
+        url = f"{self.api_base_url}/file/{task_id}"
         try:
             response = requests.get(url, timeout=self.timeout)
+        except requests.RequestException:
             return None
+        if response.status_code !=200:
             return None
         filename = self._infer_filename(response=response, task_id=task_id)
         file_path = self.cache_dir / filename
             with open(file_path, "wb") as f:
                 f.write(response.content)
             return file_path
+        except OSError:
             return None
+        return file_path
     def read_file_as_text(self, file_path: Path) -> str:
         """
         Reads supported file types into plain text.
         suffix = file_path.suffix.lower()
         try:
+            if suffix in {".txt", ".md", ".html", ".xml", ".csv", ".json"}:
                 return self._read_supported_text_file(file_path, suffix)
+            # common fallback for files saved without extension but actually text
             if suffix == "":
                 return self._read_extensionless_file(file_path)
             return ""
+        except Exception:
             return ""
     def _read_supported_text_file(self, file_path: Path, suffix: str) -> str:
         if suffix == ".csv":
             try:
                 df = pd.read_csv(file_path)
+                return df.to_csv(index=False)
             except Exception:
                 return file_path.read_text(encoding="utf-8", errors="ignore")
         return ""
     def _read_extensionless_file(self, file_path: Path) -> str:
         """
         Try to interpret extensionless files as utf-8 text first.
     @staticmethod
     def _extract_filename_from_content_disposition(content_disposition: str) -> Optional[str]:
+        """
+        Example header:
+        content-disposition: attachment; filename="example.csv"
+        """
         if "filename=" not in content_disposition:
             return None
             "text/plain": ".txt",
             "text/csv": ".csv",
             "application/csv": ".csv",
             "application/json": ".json",
             "text/markdown": ".md",
             "text/html": ".html",
             "application/xml": ".xml",
             "text/xml": ".xml",
         }
         for key, ext in mapping.items():
     @staticmethod
     def _safe_filename(filename: str) -> str:
+        """
+        Prevent path traversal and weird path issues.
+        """
         return os.path.basename(filename)