File size: 2,176 Bytes
860424e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61

import json
import csv
import openpyxl
import whisper
import os
import requests
from smolagents.tools import tool

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

def _download_file(file_name: str) -> None:
    if not os.path.exists(file_name):
        url = f"{DEFAULT_API_URL}/files/{file_name.split('.')[-2]}"
        r = requests.get(url)
        with open(file_name, "wb") as f:
            f.write(r.content)

@tool
def read_file_as_text(file_name: str) -> str:
    """
    Opens a file and returns its content as readable text.
    Supports 'txt', 'json', 'csv', 'xlsx', and 'mp3' (for mp3, it transcribes speech to text).
    Args:
        file_name (str): The path or name of the file.
    Returns:
        str: The content of the file as text, or transcribed speech if 'mp3'.
    """
    _download_file(file_name)
    file_type = file_name.split(".")[-1]
    try:
        if file_type in {"txt", "py"}:
            with open(file_name, "r", encoding="utf-8") as f:
                return f.read()
        elif file_type == "json":
            with open(file_name, "r", encoding="utf-8") as f:
                data = json.load(f)
            return json.dumps(data, indent=2)
        elif file_type == "csv":
            with open(file_name, "r", encoding="utf-8") as f:
                reader = csv.reader(f)
                rows = list(reader)
            return "\n".join([", ".join(row) for row in rows])
        elif file_type == "xlsx":
            wb = openpyxl.load_workbook(file_name, data_only=True)
            sheet = wb.active
            content = []
            for row in sheet.iter_rows(values_only=True):
                content.append(", ".join(str(cell) if cell is not None else "" for cell in row))
            return "\n".join(content)
        elif file_type == "mp3":
            w = whisper.load_model("base")
            res = w.transcribe(file_name)
            return res["text"]
        else:
            return f"File type '{file_type}' not supported."
    except FileNotFoundError:
        return f"File '{file_name}' not found."
    except Exception as e:
        return f"Error opening file '{file_name}': {str(e)}"