Spaces:
Sleeping
Sleeping
Sagar Sanghani
commited on
Commit
·
b64c7e2
1
Parent(s):
81917a3
added HF model
Browse files
model.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dotenv import load_dotenv, find_dotenv
|
| 2 |
+
import os
|
| 3 |
+
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
|
| 4 |
+
from langchain_community.tools import DuckDuckGoSearchRun
|
| 5 |
+
from langchain_community.document_loaders import AsyncHtmlLoader
|
| 6 |
+
from langchain.tools import tool
|
| 7 |
+
from langchain.prompts import ChatPromptTemplate
|
| 8 |
+
from langchain.agents import AgentExecutor, create_tool_calling_agent
|
| 9 |
+
from prompt import get_prompt
|
| 10 |
+
import re
|
| 11 |
+
|
| 12 |
+
class Model:
|
| 13 |
+
def __init__(self):
|
| 14 |
+
#load_dotenv(find_dotenv())
|
| 15 |
+
self.token = os.getenv("HF_TOKEN")
|
| 16 |
+
self.system_prompt = get_prompt()
|
| 17 |
+
self.agent_executor = self.setup_model()
|
| 18 |
+
|
| 19 |
+
# Define a tool for the agent to use
|
| 20 |
+
@tool
|
| 21 |
+
def scrape_webpage(self,url: str) -> str:
|
| 22 |
+
"""Scrapes a given URL and returns the content."""
|
| 23 |
+
loader = AsyncHtmlLoader(url)
|
| 24 |
+
docs = loader.load()
|
| 25 |
+
return docs[0].page_content# Define the search tool
|
| 26 |
+
|
| 27 |
+
def get_answer(self, question: str) -> str:
|
| 28 |
+
try:
|
| 29 |
+
result = self.agent_executor.invoke({"input": question})
|
| 30 |
+
except BaseException as e:
|
| 31 |
+
print(f"An error occurred: {e}")
|
| 32 |
+
result = {"FINAL_ANSWER":"ERROR"}
|
| 33 |
+
|
| 34 |
+
# The final answer is typically in the 'output' key of the result dictionary
|
| 35 |
+
final_answer = result['output']
|
| 36 |
+
|
| 37 |
+
pattern = r'FINAL_ANSWER:"(.*?)"'
|
| 38 |
+
match = re.search(pattern, final_answer, re.DOTALL)
|
| 39 |
+
if match:
|
| 40 |
+
final_answer_value = match.group(1)
|
| 41 |
+
print(f"The extracted FINAL_ANSWER is: {final_answer_value}")
|
| 42 |
+
else:
|
| 43 |
+
print("ERROR: Pattern not found.: {r}")
|
| 44 |
+
final_answer_value = "ERROR"
|
| 45 |
+
|
| 46 |
+
return final_answer_value
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def setup_model(self):
|
| 50 |
+
search = DuckDuckGoSearchRun()
|
| 51 |
+
|
| 52 |
+
# # Define a tool for the agent to use
|
| 53 |
+
tools = [search, self.scrape_webpage]
|
| 54 |
+
|
| 55 |
+
llm = HuggingFaceEndpoint(
|
| 56 |
+
repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
|
| 57 |
+
huggingfacehub_api_token=self.token,
|
| 58 |
+
temperature=0.1
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
chat = ChatHuggingFace(llm=llm).bind_tools(tools)
|
| 62 |
+
|
| 63 |
+
# Create the ReAct prompt template
|
| 64 |
+
prompt = ChatPromptTemplate.from_messages(
|
| 65 |
+
[
|
| 66 |
+
("system", self.system_prompt), # Use the new, detailed ReAct prompt
|
| 67 |
+
("placeholder", "{agent_scratchpad}"),
|
| 68 |
+
("human", "{input}"),
|
| 69 |
+
]
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
# Create the agent
|
| 73 |
+
agent = create_tool_calling_agent(chat, tools, prompt)
|
| 74 |
+
|
| 75 |
+
# Create the agent executor
|
| 76 |
+
return AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def main():
|
| 81 |
+
load_dotenv(find_dotenv())
|
| 82 |
+
model = Model()
|
| 83 |
+
response = model.get_answer("In what directory could the public find the free Penguin Project dating software for Mac as of 1997?")
|
| 84 |
+
print(f"the output is: {response}")
|
| 85 |
+
|
| 86 |
+
if __name__ == "__main__":
|
| 87 |
+
main()
|
prompt.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def get_prompt():
|
| 2 |
+
system_prompt = """
|
| 3 |
+
You are a general AI assistant designed to solve complex, multi-step questions. You have access to the following tools:
|
| 4 |
+
|
| 5 |
+
1. **search**: A search engine. Use this for general queries, finding current events, or looking up broad information.
|
| 6 |
+
- Example: `search("population of Tokyo")`
|
| 7 |
+
2. **scrape_webpage**: Scrapes a given URL to return its content. Use this after a search to get detailed information from a specific website.
|
| 8 |
+
- Example: `scrape_webpage("https://example.com/article")`
|
| 9 |
+
|
| 10 |
+
Your final answer must be a concise string following the format: `FINAL_ANSWER:"<your final answer>"`.
|
| 11 |
+
|
| 12 |
+
**Thought Process and Tool Usage (ReAct Pattern):**
|
| 13 |
+
|
| 14 |
+
The process is a sequence of Thought, Action, Action Input, and Observation. You must follow this pattern exactly.
|
| 15 |
+
|
| 16 |
+
**Thought**: You must always begin with a thought. Describe your reasoning and the next step you will take.
|
| 17 |
+
**Action**: Choose a tool from the list above. The action must be one of the tool names provided.
|
| 18 |
+
**Action Input**: Provide the input for the selected tool.
|
| 19 |
+
**Observation**: This is the result from the tool. You will be provided with this information.
|
| 20 |
+
|
| 21 |
+
After an observation, you can generate another Thought and continue the cycle until you have enough information to provide the FINAL_ANSWER.
|
| 22 |
+
|
| 23 |
+
**Example 1:**
|
| 24 |
+
Question: "What is the capital of France?"
|
| 25 |
+
Thought: The user is asking for a general fact about a city. I can use the search tool to find this information.
|
| 26 |
+
Action: search
|
| 27 |
+
Action Input: capital of France
|
| 28 |
+
Observation: The capital of France is Paris.
|
| 29 |
+
Thought: I have the information needed to answer the question.
|
| 30 |
+
FINAL_ANSWER:"Paris"
|
| 31 |
+
|
| 32 |
+
**Example 2:**
|
| 33 |
+
Question: "What is the first season of the show that Michael Schur discussed in a 2022 TED interview about every episode ending on an unpleasant note? What was the opening stock price on February 1, 1980 of the railroad that owned the tracks behind the building used for exterior shots of the show's main setting after season one?"
|
| 34 |
+
Thought: This is a complex, multi-step question. I need to break it down.
|
| 35 |
+
1. First, I need to identify the show Michael Schur is talking about. I'll use a search query to find the 2022 TED interview and the show he describes.
|
| 36 |
+
2. Second, I'll identify the building used for exterior shots of the main setting after season one.
|
| 37 |
+
3. Third, I'll find the railroad that owned the tracks behind that building.
|
| 38 |
+
4. Finally, I'll search for the opening stock price of that railroad on February 1, 1980, using Yahoo Finance or a similar source.
|
| 39 |
+
|
| 40 |
+
Action: search
|
| 41 |
+
Action Input: Michael Schur 2022 TED interview unpleasant ending show
|
| 42 |
+
Observation: ... (The result of the search will be provided here, likely mentioning "The Office" and its first season)
|
| 43 |
+
Thought: ... (Continue the cycle based on the observation)
|
| 44 |
+
FINAL_ANSWER:"The first season of the Office"
|
| 45 |
+
|
| 46 |
+
"""
|
| 47 |
+
return system_prompt
|