Spaces:
Sleeping
Sleeping
| # task = task_generation(sitemap) | |
| from openai import OpenAI | |
| from datasets import load_dataset | |
| import json_repair | |
| class DataPopulation: | |
| def __init__(self, api_key): | |
| # Set the API key during initialization | |
| self.client = OpenAI(api_key=api_key) | |
| self.conversation = [ | |
| { | |
| "role": "system", | |
| "content": ( | |
| "You are an intelligent assistant specialized in web page management tasks. " | |
| "Your responsibilities include identifying relevant pages, updating page details, user data, and the sitemap as required." | |
| ) | |
| } | |
| ] | |
| def fetch_huggingface_dataset(self, dataset_name): | |
| """Fetch the dataset from Hugging Face.""" | |
| return load_dataset(dataset_name) | |
| def gpt4_chat(self, conversation): | |
| """Send a chat request to GPT-4.""" | |
| response = self.client.chat.completions.create( | |
| model="gpt-4", | |
| messages=conversation, | |
| max_tokens=1000, # Adjusted max_tokens if needed | |
| temperature=0.7, | |
| ) | |
| return response.choices[0].message.content.strip() | |
| def ask_for_relevant_pages(self, task, sitemap): | |
| """Identify relevant pages for the task from the sitemap.""" | |
| self.conversation.append({ | |
| "role": "user", | |
| "content": ( | |
| f"Given the task: '{task}' and the sitemap:\n{sitemap}\n\n" | |
| f"Respond first with a brief 'Plan' which suggests what data we have to pre-populate the sitemap" | |
| f"to make task accomplishable. Then identify the page(s) these data going to be stored on. " | |
| "Return the page names exactly as they appear in the sitemap, in JSON format. " | |
| "For each relevant page, provide a brief explanation of its relevance. " | |
| "Example response:\nPlanning sentences. PAGES: {{\n 'Ride History': 'Displays previous ride data needed for the task.'\n}}" | |
| ) | |
| }) | |
| response_content = self.gpt4_chat(self.conversation) | |
| return response_content | |
| def _update_user_data(self, task, relevant_page_details, relevant_pages): | |
| """Populate the relevant user data for the task.""" | |
| self.conversation.append({ | |
| "role": "user", | |
| "content": ( | |
| f"Given the task: '{task}' and the following task-relevant page details:\n{relevant_page_details}\n\n" | |
| f"Here is reason behind each relevant page: {relevant_pages}." | |
| f"Update each page's 'user_data' value with essential information for task-completion." | |
| f"For example, if a task ask us to retrieve previous order, then we will need to populate synthetic order history in user_data." | |
| "Ensure output maintain the exact format and structure as input page details." | |
| ) | |
| }) | |
| response_content = self.gpt4_chat(self.conversation) | |
| return response_content | |
| def ask_to_update_user_state(self, task, user_state): | |
| """Update the user state based on the task.""" | |
| self.conversation.append({ | |
| "role": "user", | |
| "content": ( | |
| f"Given the task: '{task}', default user state:\n{user_state}, and user_data in chat history.\n\n" | |
| "Initialize the user state values to reflect any initial status necessary for completing the task. " | |
| "Ensure output maintain the exact format and structure as input page details." | |
| ) | |
| }) | |
| response_content = self.gpt4_chat(self.conversation) | |
| return json_repair.loads(response_content) | |
| def extract_uid_from_sitemap(sitemap, relevant_pages): | |
| """Extract UIDs for the relevant pages from the sitemap.""" | |
| uid = [] | |
| for page in relevant_pages: | |
| try: | |
| uid.append(sitemap['pages'][page]['uid']) | |
| except KeyError: | |
| print(f"Page name '{page}' not found in the sitemap.") | |
| return uid | |
| def process_data(self, task, hugging_face_url): | |
| """Process the task with the given dataset.""" | |
| dataset = self.fetch_huggingface_dataset(hugging_face_url) | |
| # Extract the sitemap, page details, and user state from the dataset | |
| sitemap = eval(dataset['train'][0]['value']) | |
| page_details = eval(dataset['train'][1]['value']) | |
| user_state = eval(dataset['train'][2]['value']) | |
| # Step 1: Identify relevant pages | |
| relevant_pages = self.ask_for_relevant_pages(task, sitemap) | |
| relevant_pages = relevant_pages.split("PAGES:", 1)[1].strip() | |
| self.conversation.append({"role": "assistant", "content": relevant_pages}) | |
| relevant_pages = json_repair.loads(relevant_pages) | |
| target_page_names = relevant_pages.keys() | |
| # Step 2: Extract UIDs for the relevant pages | |
| page_uid = self.extract_uid_from_sitemap(sitemap, target_page_names) | |
| # Step 3: Retrieve page details using the UIDs | |
| relevant_page_details = { | |
| uid: page_details[uid] for uid in page_uid if uid in page_details | |
| } | |
| # Step 4: Populate user data for the task (only for relevant pages) | |
| updated_user_data = self._update_user_data(task, relevant_page_details, relevant_pages) | |
| self.conversation.append({"role": "assistant", "content": updated_user_data}) | |
| updated_user_data = json_repair.loads(updated_user_data) | |
| for uid, page_data in updated_user_data.items(): | |
| try: | |
| page_details[uid]['user_data'] = page_data['user_data'] | |
| except: | |
| continue | |
| # Step 5: Update user state | |
| updated_user_state = self.ask_to_update_user_state(task, user_state) | |
| # Return the updated structures | |
| return sitemap, page_details, updated_user_state | |