Spaces:
Sleeping
Sleeping
| import os | |
| from huggingface_hub import InferenceClient | |
| import gradio as gr | |
| token = os.environ["HF_TOKEN"] | |
| client = InferenceClient("meta-llama/Llama-3.2-3B-Instruct", token=token) | |
| SYSTEM_PROMPT = """Answer the following questions as best you can. You have access to the following tools: | |
| get_weather: Get the current weather in a given location | |
| The way you use the tools is by specifying a json blob. | |
| Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here). | |
| The only values that should be in the "action" field are: | |
| get_weather: Get the current weather in a given location, args: {"location": {"type": "string"}} | |
| example use : | |
| ``` | |
| {{ | |
| "action": "get_weather", | |
| "action_input": {"location": "New York"} | |
| }} | |
| ALWAYS use the following format: | |
| Question: the input question you must answer | |
| Thought: you should always think about one action to take. Only one action at a time in this format: | |
| Action: | |
| ``` | |
| $JSON_BLOB | |
| ``` | |
| Observation: the result of the action. This Observation is unique, complete, and the source of truth. | |
| (this Thought/Action/Observation can repeat N times, you should take several steps when needed. The $JSON_BLOB must be formatted as markdown and only use a SINGLE action at a time.) | |
| You must always end your output with the following format: | |
| Thought: I now know the final answer | |
| Final Answer: the final answer to the original input question | |
| Now begin! Reminder to ALWAYS use the exact characters `Final Answer:` when you provide a definitive answer. """ | |
| prompt=f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|> | |
| {SYSTEM_PROMPT} | |
| <|eot_id|><|start_header_id|>user<|end_header_id|> | |
| What's the weather in London ? | |
| <|eot_id|><|start_header_id|>assistant<|end_header_id|> | |
| """ | |
| output = client.text_generation( | |
| prompt, | |
| max_new_tokens=200, | |
| stop=["Observation:"] | |
| ) | |
| # Dummy function | |
| def get_weather(location): | |
| return f"the weather in {location} is sunny with low temperatures. \n" | |
| new_prompt=prompt+output+get_weather('London') | |
| final_output = client.text_generation( | |
| new_prompt, | |
| max_new_tokens=200, | |
| ) | |
| def llm(): | |
| return final_output | |
| app = gr.Interface( | |
| title="Simple Agent", | |
| description="A simple agent that completes the text.", | |
| fn=llm, | |
| inputs=None, | |
| outputs="text" | |
| ) | |
| app.launch() | |