Spaces:
Runtime error
Runtime error
| from langchain.tools import tool | |
| import requests | |
| from pydantic import BaseModel, Field | |
| import datetime | |
| import pandas as pd | |
| from langchain.prompts import MessagesPlaceholder | |
| dataf = pd.read_csv( | |
| "HW 1 newest version.csv" | |
| ) | |
| # Import create_pandas_dataframe_agent from langchain_experimental.agents | |
| from langchain_experimental.agents import create_pandas_dataframe_agent | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.agents.agent_types import AgentType | |
| # Define the create_dataframe_agent_tool function | |
| def dataframeagent(value: str) -> str: | |
| """ | |
| This function searches the entire dataframe to find rows where any column contains the specified value. | |
| Parameters: | |
| value (str): The value to search for in all columns. | |
| Returns: | |
| str: A string representation of the filtered dataframe and the extremes for specified columns. | |
| """ | |
| # First, search the entire dataframe for the specified value | |
| #filtered_data = dataf[dataf.apply(lambda row: row.astype(str).str.contains(value, case=False).any(), axis=1)] | |
| #if filtered_data.empty: | |
| #return f"No matches found for '{value}'." | |
| # Columns for finding highest and lowest values | |
| columns_to_check = ['Profit Margin', 'Operating Margin (ttm)', 'Return on Assets (ttm)', | |
| 'Return on Equity (ttm)', 'Revenue (ttm)', 'Revenue Per Share (ttm)'] | |
| result = [f"Search Results for '{value}':\n{dataf.to_string(index=False)}\n"] | |
| # Find and display highest and lowest values for numerical columns | |
| for column in columns_to_check: | |
| try: | |
| # Convert column to numeric (removing symbols like '%' and 'M' for millions) | |
| dataf[column] = pd.to_numeric(dataf[column].str.replace('%', '').str.replace('M', ''), errors='coerce') | |
| highest_row = dataf.loc[dataf[column].idxmax()] | |
| lowest_row = dataf.loc[dataf[column].idxmin()] | |
| result.append(f"Highest {column}:\n{highest_row.to_string()}\n") | |
| result.append(f"Lowest {column}:\n{lowest_row.to_string()}\n") | |
| except Exception as e: | |
| result.append(f"Error processing column {column}: {str(e)}\n") | |
| return "\n".join(result) | |
| import json | |
| from pathlib import Path | |
| import pandas as pd | |
| example_filepath = "QA_summary_zh.csv" | |
| # Read the CSV file | |
| csv_data = pd.read_csv(example_filepath, encoding="utf-8") | |
| # Convert CSV to JSON | |
| json_data = csv_data.to_json(orient='records', force_ascii=False) | |
| json_data | |
| # Save the JSON data to a file | |
| json_file_path = "QA_summary_zh.json" | |
| with open(json_file_path, 'w', encoding='utf-8') as json_file: | |
| json_file.write(json_data) | |
| data = json.loads(Path(json_file_path).read_text()) | |
| from langchain.document_loaders import JSONLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| file_path='QA_summary_zh.json' | |
| # Define jq schema to extract text content. | |
| # This assumes your JSON has a field named 'text' containing the relevant text. | |
| jq_schema='.[] | {Question: .Question , Answer: .Answer , description: .description }' | |
| loader = JSONLoader( | |
| file_path=file_path, | |
| jq_schema=jq_schema, # Add the jq_schema argument here | |
| text_content=False) | |
| # Load the documents | |
| docs = loader.load() | |
| print(docs) | |
| all_splits = docs | |
| import json | |
| from pathlib import Path | |
| import pandas as pd | |
| import os | |
| from langchain_chroma import Chroma | |
| from langchain_openai import OpenAIEmbeddings | |
| os.environ["OPENAI_API_KEY"] = "sk-proj-vErxLzVKAuHM8QuXOGnCT3BlbkFJM3q6IDbWmRHnWB6ZeHXZ" | |
| vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings()) | |
| # Import necessary modules | |
| from langchain import hub | |
| from langchain.prompts import PromptTemplate | |
| from langchain.schema import StrOutputParser | |
| from langchain.chains import ConversationChain | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.schema import HumanMessage | |
| from langchain_core.runnables import RunnablePassthrough, RunnableLambda | |
| def FAQ(question: str) -> str: | |
| """Processes a question, retrieves relevant context, and generates a response.""" | |
| # Define the prompt template | |
| template = """ | |
| 您是一個繁體中文的助理,以下是從知識庫中檢索到的相關內容,請根據它們回答用戶的問題。 | |
| 內容: {context} | |
| 問題: {question} | |
| """ | |
| # Function to format documents | |
| def format_docs(docs): | |
| return "\n\n".join(doc.page_content for doc in docs) | |
| # Initialize the language model | |
| llm = ChatOpenAI(temperature=0.0) | |
| # Initialize the retriever (assuming `vectorstore` is predefined) | |
| retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 1}) | |
| # Initialize the conversation memory | |
| memory = ConversationBufferMemory() | |
| conversation = ConversationChain( | |
| llm=llm, | |
| memory=memory, | |
| verbose=True | |
| ) | |
| # Retrieve documents using the retriever | |
| retrieved_docs = retriever.invoke(question) | |
| context = format_docs(retrieved_docs) | |
| # Prepare the prompt input | |
| prompt_input = { | |
| "context": context, | |
| "question": question, | |
| } | |
| # Format prompt_input as a string | |
| formatted_prompt_input = template.format( | |
| context=prompt_input["context"], | |
| question=prompt_input["question"], | |
| ) | |
| # Use the conversation chain to process the formatted input | |
| response = conversation.predict(input=formatted_prompt_input) | |
| return response | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import random | |
| # List of different headers to mimic various browser requests | |
| user_agents = [ | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", | |
| "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15", | |
| "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36", | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0", | |
| "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1" | |
| ] | |
| def gresb(query: str) -> str: | |
| """Processes a question, retrieves relevant context, and generates a response. | |
| 1. article_text | |
| 2. article_url | |
| """ | |
| base_url = "https://www.gresb.com/nl-en?s=" | |
| search_url = f"{base_url}{query.replace(' ', '+')}" | |
| # Select a random User-Agent header | |
| headers = { | |
| "User-Agent": random.choice(user_agents) | |
| } | |
| # Make a request to the search URL with headers | |
| response = requests.get(search_url, headers=headers) | |
| # Check if the request was successful | |
| if response.status_code == 200: | |
| # Parse the HTML content | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| # Extract search results (adjust the selector based on the website structure) | |
| results = soup.find_all('a', class_='overlay-link z-index-1') | |
| # Check if there are any results | |
| if results: | |
| # Get the first result's link | |
| article_url = results[0]['href'] | |
| # Fetch the HTML content of the article | |
| article_response = requests.get(article_url, headers=headers) | |
| if article_response.status_code == 200: | |
| # Extract the article text and return it with the URL | |
| article_text = extract_article_text(article_response.content) | |
| return f"Article Text: {article_text}\n\nArticle URL: {article_url}" | |
| else: | |
| return f"Failed to retrieve the article page. Status code: {article_response.status_code}" | |
| else: | |
| return "No search results found." | |
| else: | |
| return f"Failed to retrieve search results. Status code: {response.status_code}" | |
| def extract_article_text(html_content): | |
| soup = BeautifulSoup(html_content, 'html.parser') | |
| # Look for common article structures on GRESB's website | |
| article = soup.find('div', class_='wysiwyg') | |
| if article: | |
| paragraphs = article.find_all(['p', 'ul', 'blockquote', 'h2', 'h4']) # Includes <p>, <ul>, <blockquote>, <h2>, <h4> tags | |
| return ' '.join(p.get_text() for p in paragraphs).strip() | |
| return "Article content not found in the provided structure." | |
| # Example usage | |
| #query = "london office" | |
| #article_text = search_and_extract_gresb(query) | |
| #print(article_text) # This will print the extracted article content or any status messages | |
| import os | |
| import openai | |
| os.environ["OPENAI_API_KEY"] = "sk-proj-vErxLzVKAuHM8QuXOGnCT3BlbkFJM3q6IDbWmRHnWB6ZeHXZ" | |
| openai.api_key = os.environ['OPENAI_API_KEY'] | |
| tools = [gresb, dataframeagent,FAQ] | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.prompts import ChatPromptTemplate | |
| from langchain.tools.render import format_tool_to_openai_function | |
| from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser | |
| functions = [format_tool_to_openai_function(f) for f in tools] | |
| model = ChatOpenAI(temperature=0).bind(functions=functions) | |
| def run_agent(user_input): | |
| # 初始化一個空列表,用於存放中間步驟的結果和觀察值 | |
| intermediate_steps = [] | |
| max_iterations = 20 # 設置最大迭代次數,以避免無限循環 | |
| iteration_count = 0 | |
| # 進入循環,直到代理完成任務或者達到最大迭代次數 | |
| while iteration_count < max_iterations: | |
| iteration_count += 1 | |
| # 調用處理鏈 (agent_chain) 並傳遞用戶輸入和中間步驟數據 | |
| result = agent_chain.invoke({ | |
| "input": user_input, # 傳遞用戶輸入,這裡是用戶查詢 | |
| "intermediate_steps": intermediate_steps # 傳遞中間步驟,初始為空列表 | |
| }) | |
| # 如果結果是 AgentFinish 類型,說明代理已經完成任務,返回結果 | |
| if isinstance(result, AgentFinish): | |
| return result.return_values # 返回代理的最終輸出 | |
| # Now it's safe to print the message log | |
| print(result.message_log) | |
| # 根據結果中的工具名稱選擇合適的工具函數 | |
| tool = { | |
| "gresb": gresb, | |
| "dataframeagent": dataframeagent, | |
| "FAQ":FAQ | |
| }.get(result.tool) | |
| # 如果工具函數存在,則運行工具函數 | |
| if tool: | |
| observation = tool.run(result.tool_input) | |
| # 將當前步驟的結果和觀察值加入 intermediate_steps 列表中 | |
| intermediate_steps.append((result, observation)) | |
| else: | |
| print(f"未找到合適的工具: {result.tool}") | |
| break | |
| # 如果迭代次數超過最大限制,返回錯誤信息 | |
| return "無法完成任務,請稍後再試。" | |
| from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate | |
| prompt = ChatPromptTemplate.from_messages([ | |
| ("system", | |
| """You are a helpful assistant. There are three tools to use based on different scenarios. | |
| 1. gresb Tool: | |
| Usage Scenario: Use this tool when you need to search for fund information related to a specific area, city, or keyword on the GRESB website. It is ideal for searching fund details in specific locations such as "London office" or "Paris commercial real estate." | |
| 2. dataframeagent Tool: | |
| Usage Scenario: This dataframe contains 'Fund Name', 'Region', 'Ticker','Profit Margin', 'Operating Margin (ttm)', 'Return on Assets (ttm)', 'Return on Equity (ttm)', | |
| 'Revenue (ttm)', and 'Revenue Per Share (ttm)', choose one to search in the dataframe | |
| You have access to the following note: GRESB is not a foud. | |
| 3. FAQ Tool | |
| Usage Scenario: use this tool to search for 綠建築標章申請審核認可及使用作業要點. | |
| example:「綠建築標章申請審核認可及使用作業要點」規定,修正重點為何? | |
| example:109年7月1日起申請綠建築標章評定有何改變? | |
| """), | |
| MessagesPlaceholder(variable_name="chat_history"), | |
| ("user", "{input}"), | |
| MessagesPlaceholder(variable_name="agent_scratchpad") | |
| ]) | |
| from langchain.agents.format_scratchpad import format_to_openai_functions | |
| from langchain.schema.runnable import RunnablePassthrough | |
| from langchain.schema.agent import AgentFinish | |
| agent_chain = RunnablePassthrough.assign( | |
| agent_scratchpad= lambda x: format_to_openai_functions(x["intermediate_steps"]) | |
| ) | prompt | model | OpenAIFunctionsAgentOutputParser() | |
| from langchain.memory import ConversationBufferMemory | |
| memory = ConversationBufferMemory(return_messages=True,memory_key="chat_history") | |
| from langchain.agents import AgentExecutor | |
| agent_executor = AgentExecutor(agent=agent_chain, tools=tools, verbose=True, memory=memory) | |
| import gradio as gr | |
| # 處理函數,提取 AIMessage 的內容 | |
| def process_input(user_input): | |
| # 使用 agent_executor.invoke 來處理輸入 | |
| memory.clear() | |
| result = agent_executor.invoke({"input": user_input}) | |
| # 從結果中提取 AIMessage 的內容 | |
| if 'output' in result: | |
| return result['output'] | |
| else: | |
| return "No output found." | |
| # 建立 Gradio 介面 | |
| iface = gr.Interface( | |
| fn=process_input, # 處理函數 | |
| inputs="text", # 使用者輸入類型 | |
| outputs="text", # 輸出類型 | |
| title="TABC", # 介面標題 | |
| description="The chatbot contains: Extracting YahooFinancial data, Scraping GRESB Website, and Retrieving 綠建築申請資料" # 介面描述 | |
| ) | |
| # 啟動介面 | |
| iface.launch() | |