{ "cells": [ { "cell_type": "code", "id": "a49a961ef9dafc8b", "metadata": { "ExecuteTime": { "end_time": "2025-06-10T15:26:54.606280Z", "start_time": "2025-06-10T15:26:54.499875Z" } }, "source": "from langchain_community.document_loaders import UnstructuredExcelLoader", "outputs": [], "execution_count": 3 }, { "metadata": { "ExecuteTime": { "end_time": "2025-06-10T15:26:54.806898Z", "start_time": "2025-06-10T15:26:54.804638Z" } }, "cell_type": "code", "source": "file_dir = 'files/7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx'", "id": "7f1454e97563e93", "outputs": [], "execution_count": 4 }, { "metadata": { "ExecuteTime": { "end_time": "2025-06-10T15:29:12.643058Z", "start_time": "2025-06-10T15:29:12.625239Z" } }, "cell_type": "code", "source": [ "\n", "loader = UnstructuredExcelLoader(file_dir, mode=\"elements\")\n", "docs = loader.load()\n", "\n", "print(len(docs))\n", "\n", "# docs" ], "id": "initial_id", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n" ] } ], "execution_count": 7 }, { "metadata": { "ExecuteTime": { "end_time": "2025-06-10T15:32:40.555855Z", "start_time": "2025-06-10T15:32:40.553019Z" } }, "cell_type": "code", "source": "print(docs[0].metadata['text_as_html'])", "id": "6c69b91ca45b0039", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "
| Location | Burgers | Hot Dogs | Salads | Fries | Ice Cream | Soda |
| Pinebrook | 1594 | 1999 | 2002 | 2005 | 1977 | 1980 |
| Wharvton | 1983 | 2008 | 2014 | 2015 | 2017 | 2018 |
| Sagrada | 2019 | 2022 | 2022 | 2023 | 2021 | 2019 |
| Algrimand | 1958 | 1971 | 1982 | 1989 | 1998 | 2009 |
| Marztep | 2015 | 2016 | 2018 | 2019 | 2021 | 2022 |
| San Cecelia | 2011 | 2010 | 2012 | 2013 | 2015 | 2016 |
| Pimento | 2017 | 1999 | 2001 | 2003 | 1969 | 2967 |
| Tinseles | 1967 | 1969 | 1982 | 1994 | 2005 | 2006 |
| Rosdale | 2007 | 2009 | 2021 | 1989 | 2005 | 2011 |