File size: 8,658 Bytes
466a315
 
 
 
 
 
 
 
 
 
 
 
 
 
db622b0
 
466a315
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a45a161
466a315
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c216cbf
466a315
c216cbf
 
 
466a315
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a45a161
466a315
 
 
 
 
 
 
 
 
 
 
a45a161
466a315
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db622b0
 
466a315
 
 
 
 
 
 
 
 
a45a161
466a315
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c5fcf0
466a315
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a45a161
466a315
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import os
import pandas as pd
from smolagents import (
    CodeAgent, 
    LiteLLMModel, 
    DuckDuckGoSearchTool, 
    FinalAnswerTool,
    VisitWebpageTool,
    WikipediaSearchTool,
    WebSearchTool,
    tool,
    OpenAIServerModel
)
from langchain_community.document_loaders import ArxivLoader
from google.colab import userdata
    
import requests
import yaml
from dotenv import load_dotenv
load_dotenv()


def fetch_questions():
    DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
    try:
        response = requests.get(f"{DEFAULT_API_URL}/questions")
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
            print("Fetched questions list is empty.")
            return "Fetched questions list is empty or invalid format.", None
        print(f"Fetched {len(questions_data)} questions.")
        return questions_data
    except Exception as e:
        print(f"Error fetching questions: {e}")
        raise e
    
def fetch_file(task_id: str, file_name: str):
    DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
    try:
        response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}")
        response.raise_for_status()
        with open(f"data/question_files/{file_name}", "wb") as f:
            f.write(response.content)
        file_content = response.content
        return file_content
    except Exception as e:
        print(f"Error fetching file: {e}")
        raise e
    

def submit_answers(answers):
    DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
    request_payload = {
        "username": "GoReed",
        "agent_code": "test",
        "answers": answers
    }
    try:
        response = requests.post(
            f"{DEFAULT_API_URL}/submit",
                json=request_payload
        )
            # json=json.dumps(request_payload),
            # headers={"Content-Type": "application/json"}
        # )
        response.raise_for_status()
        json_response = response.json()
        print(f"Response: {json_response}")
        return json_response
    except Exception as e:
        print(f"Error submitting answers: {e}")

@tool
def arxiv_search(query: str) -> str:
    """Search Arxiv for a query and return maximum 3 result.
    Args:
        query: The search query."""
    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
            for doc in search_docs
        ]
    )
    return {"arxiv_results": formatted_search_docs}

@tool
def read_python_file(file_name: str) -> str:
    """Read a python file and return the content.
    Args:
        file_name: The name of the file to read.
    Returns:
        The content of the file.
    """
    base_path = "data/question_files"
    with open(os.path.join(base_path, file_name), "r") as f:
        return f.read()

@tool
def read_excel_file(file_name: str) -> str:
    """Read an excel file with xlsx extension and return the content.
    Args:
        file_name: The name of the file to handle.
    Returns:
        The content of the file.
    """
    base_path = "data/question_files"
    df = pd.read_excel(os.path.join(base_path, file_name))
    return df.to_string()

@tool
def extract_text_from_image(image_path: str) -> str:
    """
    Extract text from an image using pytesseract (if available).
    
    Args:
        image_path: Path to the image file
        
    Returns:
        Extracted text or error message
    """
    try:
        # Try to import pytesseract
        import pytesseract
        from PIL import Image
        
        # Open the image
        image = Image.open(image_path)
        
        # Extract text
        text = pytesseract.image_to_string(image)
        print(f"Extracted text from image:\n\n{text}")
        return f"Extracted text from image:\n\n{text}"
    except ImportError:
        return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system."
    except Exception as e:
        return f"Error extracting text from image: {str(e)}"

MODEL_ID = "ollama_chat/qwen2.5-coder:7b"
secret_value = userdata.get('OPENAI_API_KEY_AG')
API_KEY = os.getenv("OPENAI_API_KEY_AG")
print(API_KEY, "HEELLLOOoooooo", os.getenv("OPENAI_API_KEY_AG"))
# model = LiteLLMModel(
#     model_id=MODEL_ID,
#     api_base="http://127.0.0.1:11434",
#     num_ctx=8192,
# )
model = OpenAIServerModel(model_id="gpt-4.1-nano", api_key=API_KEY)
MODEL_ID = "openai/gpt-4.1-nano"

with open("system_prompt.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)

agent = CodeAgent(
    model=model,
    tools=[
        WebSearchTool(),
        VisitWebpageTool(),
        WikipediaSearchTool(),
        arxiv_search,
        FinalAnswerTool(),
        extract_text_from_image,
        #read_python_file,
        #read_excel_file
    ],
    planning_interval=3,
    max_steps=10,
    verbosity_level=-1,
    additional_authorized_imports=[
                "pandas",
                "numpy",
                "requests",
                "os",
                "math",
                "sympy",
                "scipy",
                "markdownify",
                "unicodedata",
                "stat",
                "datetime",
                "random",
                "itertools",
                "statistics",
                "queue",
                "time",
                "collections",
                "re",
            ],
    add_base_tools=True,
    #prompt_templates=prompt_templates,
)
questions = fetch_questions()
answers = []
counter = 0
for index, question in enumerate(questions):
    # print(f"Question {index + 1}: Question Key: {question.keys()}")
    # print(
    #     f"Task ID: {question['task_id']}\n"
    #     f"Question: {question['question']}\n"
    #     f"Level: {question['Level']}\n"
    #     f"File_name: {question['file_name']}"
    # )
    # if not question['file_name']:
    #     continue
    if question['file_name']:
        file_content = fetch_file(question['task_id'], question['file_name'])
        file_path = os.path.join("data/question_files", question['file_name'])
        #print(f"File content: {file_content}")
        answer = agent.run(
                f"""You are a general AI assistant.You can use the provided tools and websearch for finding answers. I will ask you a question and provide you with a file_name. Report your thoughts, and finish your answer. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
                question:{question['question']}
                file_path:{file_path}""",
        )
    else:
        answer = agent.run(
                f"""You are a general AI assistant.You can use the provided tools and websearch for finding answers. I will ask you a question. Report your thoughts, and finish your answer. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
                Question:{question['question']}""",
        )
    print(f"Task ID: {question['task_id']} \nQuestion: {question['question']} \nAnswer: {answer}")
    print()
    answers.append(
        {
            "task_id": question['task_id'],
            "submitted_answer": answer
        }
    )
import json
with open(f"data/answers_with_prompt_{MODEL_ID.split('/')[-1]}_with_file_content_handling.json", "w") as f:
    json.dump(answers, f, indent=2)
print("Submitting answers...")
submit_answers(answers)
print("Answers submitted successfully")