Spaces:
Sleeping
Sleeping
Create tools/FinalAnswerTool
Browse files- tools/FinalAnswerTool +42 -0
tools/FinalAnswerTool
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from smolagents import LiteLLMModel
|
| 2 |
+
from settings import settings
|
| 3 |
+
from utils import InputTokenRateLimiter
|
| 4 |
+
|
| 5 |
+
class FinalAnswerTool:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
self.model = LiteLLMModel(
|
| 8 |
+
model_id=settings.llm_model_id,
|
| 9 |
+
api_key=settings.llm_api_key,
|
| 10 |
+
temperature=0.1,
|
| 11 |
+
max_tokens=20,
|
| 12 |
+
)
|
| 13 |
+
self.token_rate_limiter = InputTokenRateLimiter()
|
| 14 |
+
self.expected_tokens_per_step = 10000
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def forward(self):
|
| 18 |
+
self.token_rate_limiter.maybe_wait(self.expected_tokens_per_step)
|
| 19 |
+
response = self.model.generate(
|
| 20 |
+
[
|
| 21 |
+
{ "role": "system",
|
| 22 |
+
"content": [
|
| 23 |
+
{
|
| 24 |
+
"type": "text",
|
| 25 |
+
"text": (
|
| 26 |
+
"You are an assistant that answers questions with exactly one word. "
|
| 27 |
+
"Always choose the most direct, unambiguous word. "
|
| 28 |
+
"Do NOT use sentences. If the answer is unknown or invalid, respond 'N/A'."
|
| 29 |
+
),
|
| 30 |
+
}
|
| 31 |
+
],
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"role": "user",
|
| 35 |
+
"content": "Generate a final answer to the question given the context provided. Only use the context to provide the final answer. Do not provide any additional thinking or commentary in the response, the final answer only.",
|
| 36 |
+
}
|
| 37 |
+
]
|
| 38 |
+
)
|
| 39 |
+
tokens_used = getattr(response, "token_usage", None)
|
| 40 |
+
if tokens_used:
|
| 41 |
+
self.token_rate_limiter.add_tokens(tokens_used.input_tokens)
|
| 42 |
+
return response.content
|