yushnitp commited on
Commit
698c93a
·
verified ·
1 Parent(s): d0a66d9

Create agent_file_parser.py

Browse files
Files changed (1) hide show
  1. agent_file_parser.py +36 -0
agent_file_parser.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import mimetypes
3
+ import tempfile
4
+ import fitz # PyMuPDF for PDF
5
+ import pandas as pd
6
+ import openpyxl
7
+ import ast
8
+
9
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
+
11
+ def fetch_task_file_context(task_id, file_name):
12
+ file_url = f"{DEFAULT_API_URL}/files/{task_id}"
13
+ response = requests.get(file_url, timeout=15)
14
+ if response.status_code != 200:
15
+ return "[File could not be retrieved]"
16
+
17
+ content_type = response.headers.get("Content-Type", "")
18
+ extension = mimetypes.guess_extension(content_type)
19
+
20
+ with tempfile.NamedTemporaryFile(delete=True, suffix=extension) as tmp:
21
+ tmp.write(response.content)
22
+ tmp.flush()
23
+
24
+ if extension == ".pdf":
25
+ return "\n".join([page.get_text() for page in fitz.open(tmp.name)])
26
+ elif extension == ".csv":
27
+ return pd.read_csv(tmp.name).to_string()
28
+ elif extension in [".xls", ".xlsx"]:
29
+ return pd.read_excel(tmp.name).to_string()
30
+ elif extension == ".py":
31
+ with open(tmp.name, "r") as f:
32
+ return f.read()
33
+ elif extension == ".mp3":
34
+ return "[Audio file: Speech-to-text not yet implemented]"
35
+ else:
36
+ return "[Unsupported file format]"