Spaces:
Runtime error
Runtime error
| import json | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| def fetch_restaurant_links(city, location): | |
| base_url = "https://deliveroo.ae" | |
| url = f"{base_url}/restaurants/{city}/{location}/?collection=restaurants" | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', | |
| 'Cookie': '__cf_bm=oakl46sJ3V9vwmnIIbfXWfkHbGmmC2pH56GyTI33b4U-1715931048-1.0.1.1-4XOcSGSThZV_INfpn3aptlo8jpZtLFbYoLsZxP9BpQ8LIjq3wBIe8CPlSf0AomuniXy4TZWyVlBQBTlrm.CPiSfI1jzx18y9zxwc9GX0fmo; roo_guid=c40617a7-76f7-432c-b780-f2653cd2edfe; roo_session_guid=2e989653-2776-4ede-a52e-b610f1ad64a2' | |
| } | |
| response = requests.get(url, headers=headers) | |
| if response.status_code == 200: | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| if "We couldn't find" in soup.text or "No restaurants" in soup.text: | |
| print("No restaurants found for the specified location.") | |
| return [] | |
| divs = soup.find_all('div', class_=["HomeFeedScrollTracker-bd9a6ffea8a4b4b7", "HomeFeedUICard-157f7be5d7b2fa7b"]) | |
| hrefs = [a_tag['href'] for div in divs for a_tag in div.find_all('a', href=True)] | |
| hrefs = hrefs[:20] | |
| return [f"{base_url}{href}" for href in hrefs] | |
| else: | |
| print("Response timed out.") | |
| return [] | |
| def Excel_final(urls): | |
| def fetch_restaurant_data(url): | |
| headers = { | |
| 'Cookie': '__cf_bm=_AOZtAiObnqBHPy4zhGRgBLW9xg9WiaDCRzg5E0sbMk-1715757967-1.0.1.1-xZNMBsnAqy_tfjUveujgfzT4Usw5ur4u7L0JlCcNXAQIC6Cq6wj46vPH7RLTh0Gq90JENxl7kbzjyOUFaBr8yCkmRGmt7APITEk0kkXzLTs; roo_guid=c40617a7-76f7-432c-b780-f2653cd2edfe; roo_session_guid=5846d6f0-5b7f-4598-8c6d-82b8023fd4fc' | |
| } | |
| response = requests.get(url, headers=headers) | |
| if response.status_code != 200: | |
| print(f"Failed to fetch the URL: {url}") | |
| return None | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| script_tag = soup.find('script', id='__NEXT_DATA__') | |
| if not script_tag: | |
| print("Script tag not found") | |
| return None | |
| json_data = json.loads(script_tag.string) | |
| json_data = json_data['props']['initialState']['menuPage']['menu']['meta'] | |
| items = json_data['items'] | |
| categories = json_data['categories'] | |
| category_map = {category['id']: category['name'] for category in categories} | |
| modifier_groups = json_data['modifierGroups'] | |
| modifier_groups_dict = {modifier_group['id']: modifier_group for modifier_group in modifier_groups} | |
| items_with_modifiers = [] | |
| current_category = None | |
| current_category_position = 0 | |
| for item in items: | |
| category_id = item['categoryId'] | |
| category_name = category_map.get(category_id, 'Unknown') | |
| if category_name == "Unknown": | |
| continue | |
| if category_name != current_category: | |
| current_category = category_name | |
| current_category_position += 1 | |
| item_position = 1 | |
| else: | |
| item_position += 1 | |
| item_with_modifiers = { | |
| "id": item['id'], | |
| "category_id": category_id, | |
| "category_name": category_name, | |
| "category_position": current_category_position, | |
| "item_position": item_position, | |
| "name": item['name'], | |
| "description": item.get('description', ''), | |
| "price": item['price']['formatted'], | |
| "img_url": item.get('image').get('url', '') if item.get('image') else '', | |
| "modifier_groups": [modifier_groups_dict.get(modifier_group_id, {}) for modifier_group_id in item.get('modifierGroupIds', [])], | |
| } | |
| items_with_modifiers.append(item_with_modifiers) | |
| return items_with_modifiers | |
| def save_data_to_excel(data, sheet_name, writer): | |
| rows = [] | |
| max_options = 0 | |
| # Find the maximum number of options for any modifier group | |
| for item in data: | |
| for modifier_group in item['modifier_groups']: | |
| num_options = len(modifier_group.get('modifierOptions', [])) | |
| if num_options > max_options: | |
| max_options = num_options | |
| for item in data: | |
| base_row = [ | |
| item['category_name'], | |
| item['category_position'], | |
| item['item_position'], | |
| item['name'], | |
| item['description'], | |
| item['price'], | |
| item['img_url'], | |
| ] | |
| first_modifier_group = True | |
| for modifier_group in item['modifier_groups']: | |
| modifier_group_row = base_row + [ | |
| modifier_group.get('name', ''), | |
| modifier_group.get('minSelection', ''), | |
| modifier_group.get('maxSelection', '') | |
| ] | |
| options = modifier_group.get('modifierOptions', []) | |
| for option in options: | |
| modifier_group_row += [ | |
| option.get('name', ''), | |
| option['price']['formatted'] if option.get('price') else '' | |
| ] | |
| # Fill in the remaining columns with empty strings if there are fewer options than max_options | |
| modifier_group_row += [''] * (max_options * 2 - len(options) * 2) | |
| if first_modifier_group: | |
| rows.append(modifier_group_row) | |
| first_modifier_group = False | |
| else: | |
| rows.append([''] * len(base_row) + modifier_group_row[len(base_row):]) | |
| if not item['modifier_groups']: | |
| rows.append(base_row + [''] * (max_options * 2 + 3)) | |
| # Create column headers | |
| columns = [ | |
| 'Category Name', 'Category Position', 'Item Position', 'Item Name', 'Description', 'Item Price', 'Image URL', 'Modifier Group Name', 'Min Selection', 'Max Selection' | |
| ] | |
| for i in range(1, max_options + 1): | |
| columns += [f'Option {i} Name', f'Option {i} Price'] | |
| df = pd.DataFrame(rows, columns=columns) | |
| if 'Max Selection' in df.columns: | |
| max_column_index = df.columns.get_loc('Max Selection') | |
| for i in range(max_column_index + 1, len(df.columns)): | |
| df.rename(columns={df.columns[i]: ''}, inplace=True) | |
| df.to_excel(writer, sheet_name=sheet_name, index=False) | |
| with pd.ExcelWriter("restaurant_data.xlsx", engine='xlsxwriter') as writer: | |
| for idx, url in enumerate(urls): | |
| data = fetch_restaurant_data(url) | |
| if data: | |
| save_data_to_excel(data, f'Sheet{idx+1}', writer) | |
| print("Data saved to restaurant_data.xlsx") | |
| if __name__ == "__main__": | |
| city = input("Enter the city: ") | |
| location = input("Enter the location: ") | |
| urls = fetch_restaurant_links(city, location) | |
| if urls: | |
| Excel_final(urls) | |
| else: | |
| print("No restaurant links found or unable to fetch data.") | |