zaradana commited on
Commit
d1479d0
·
verified ·
1 Parent(s): 81917a3

Upload 2 files (#1)

Browse files

- Upload 2 files (dc3177f056c92ec22fa26a0fc46247ee7fb4b443)

Files changed (2) hide show
  1. tools.py +126 -0
  2. utils.py +55 -0
tools.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from smolagents import Tool, tool
3
+ from huggingface_hub import HfApi
4
+ from dotenv import load_dotenv
5
+ from huggingface_hub import InferenceClient
6
+ from utils import upload_file
7
+ load_dotenv()
8
+ HF_TOKEN = os.environ.get("HF_TOKEN")
9
+
10
+ api = HfApi()
11
+ client = InferenceClient(
12
+ provider="hf-inference",
13
+ api_key=HF_TOKEN,
14
+ )
15
+
16
+ # --- Constants ---
17
+ local_data_path = "../data"
18
+
19
+ if not os.path.exists(local_data_path):
20
+ os.makedirs(local_data_path)
21
+
22
+
23
+
24
+ @tool
25
+ def image_question_answering(image_path: str, prompt: str) -> str:
26
+ """
27
+ This function takes a image path and a prompt, and returns the answer to the question.
28
+ Args:
29
+ image_path: The path to the image file
30
+ prompt: The prompt to the question
31
+ Returns:
32
+ The answer to the question
33
+ """
34
+ file_extension = image_path.split(".")[-1]
35
+ if file_extension in [".mp4", ".avi", ".mov", ".wmv", ".mkv", ".webm"]:
36
+ return "Media type not supported. Please upload an image."
37
+
38
+ if image_path.startswith("http"):
39
+ media_url = image_path
40
+ else:
41
+ media_url = upload_file(image_path)
42
+
43
+
44
+ messages = [
45
+ {
46
+ "role": "user",
47
+ "content": [
48
+ {
49
+ "type": "text",
50
+ "text": prompt,
51
+ },
52
+ {
53
+ "type": "image_url",
54
+ "image_url": {"url": media_url},
55
+ }
56
+ ],
57
+ }
58
+ ]
59
+
60
+ completion = client.chat.completions.create(
61
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct",
62
+ messages=messages,
63
+ )
64
+ return completion.choices[0].message
65
+
66
+
67
+ @tool
68
+ def transcribe_audio(file_local_path: str) -> str:
69
+ """
70
+ Transcribe the audio file and return the transcript
71
+ Args:
72
+ file_local_path: The local path to the audio file
73
+ Returns:
74
+ The transcript of the audio file
75
+ """
76
+ file_url = upload_file(file_local_path)
77
+
78
+ asr_tool = Tool.from_space(
79
+ "hf-audio/whisper-large-v3",
80
+ api_name="/predict_1", # from file
81
+ name="transcribe_audio",
82
+ description="Use this tool to transcribe the audio"
83
+ )
84
+
85
+ transcript = asr_tool(file_url)
86
+ return transcript
87
+
88
+
89
+ class GetFileTool(Tool):
90
+ name = "get_file"
91
+ description = "Download a file from the given file name"
92
+ inputs = {
93
+ "file_name": {
94
+ "type": "string",
95
+ "description": "Download the file from the given file name and outputs the local path"
96
+ }
97
+
98
+ }
99
+ output_type = "string"
100
+
101
+ def forward(self, file_name: str) -> str:
102
+ import requests
103
+
104
+ if file_name == "":
105
+ return "No file name provided"
106
+
107
+ task_id = file_name.split(".")[0]
108
+ url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
109
+ headers = {
110
+ "accept": "application/json"
111
+ }
112
+ req = requests.get(url, headers=headers)
113
+
114
+ if req.status_code != 200:
115
+ return "File not found, please check the file name and try again."
116
+
117
+ local_file_path = local_data_path + "/" + file_name
118
+
119
+ with open(local_file_path, "wb") as f:
120
+ f.write(req.content)
121
+
122
+ print(f"File saved to {local_file_path}. You can read this file to process its contents.")
123
+ return local_file_path
124
+
125
+
126
+
utils.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import OpenAIServerModel
2
+ from dotenv import load_dotenv
3
+ from huggingface_hub import HfApi
4
+ import os
5
+
6
+ load_dotenv()
7
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
8
+ repo_id = "zaradana/temp_files"
9
+
10
+ api = HfApi()
11
+
12
+ def check_asnwer_format(final_answer, agent_memory):
13
+ multimodal_model = OpenAIServerModel("gpt-4o", max_tokens=8096, api_key=OPENAI_API_KEY)
14
+ prompt = (
15
+ f"Here is a user-given task and the agent steps: {agent_memory.get_succinct_steps()}. "
16
+ f"Here is the final answer: {final_answer}. "
17
+ "Please check that the answer is in the requested format. "
18
+ "First list reasons why yes/no, then write your final decision: PASS in caps lock if it is satisfactory, FAIL if it is not."
19
+ )
20
+ messages = [
21
+ {
22
+ "role": "user",
23
+ "content": [
24
+ {
25
+ "type": "text",
26
+ "text": prompt,
27
+ }
28
+ ],
29
+ }
30
+ ]
31
+ output = multimodal_model(messages).content
32
+ print("Feedback: ", output)
33
+ if "FAIL" in output:
34
+ raise Exception(output)
35
+ return True
36
+
37
+
38
+ def upload_file(file_local_path: str) -> str:
39
+ """
40
+ Upload a file to the Hugging Face Hub and return the URL
41
+ Args:
42
+ file_local_path: The local path to the file
43
+ Returns:
44
+ The URL of the uploaded file
45
+ """
46
+ file_name = file_local_path.split("/")[-1]
47
+
48
+ api.upload_file(
49
+ path_or_fileobj=file_local_path,
50
+ path_in_repo=file_name,
51
+ repo_id=repo_id,
52
+ repo_type="dataset"
53
+ )
54
+ file_url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{file_name}"
55
+ return file_url