rohanshaw commited on
Commit
2b11763
·
verified ·
1 Parent(s): 2e50007

Upload 8 files

Browse files
Files changed (7) hide show
  1. Dockerfile +12 -12
  2. chatbot.py +2 -2
  3. chatbotmemory.py +91 -0
  4. dataset.txt +66 -75
  5. requirements.txt +11 -11
  6. scrapWebpage.py +39 -0
  7. test.ipynb +791 -0
Dockerfile CHANGED
@@ -1,13 +1,13 @@
1
- FROM python:3.11-slim
2
-
3
- WORKDIR /
4
-
5
- COPY ./requirements.txt .
6
-
7
- RUN pip install --no-cache-dir --upgrade -r requirements.txt
8
-
9
- COPY . .
10
-
11
- EXPOSE 7860
12
-
13
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ FROM python:latest
2
+
3
+ WORKDIR /
4
+
5
+ COPY ./requirements.txt .
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
8
+
9
+ COPY . .
10
+
11
+ EXPOSE 7860
12
+
13
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
chatbot.py CHANGED
@@ -36,10 +36,10 @@ class Chatbot():
36
  docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
37
 
38
 
39
- llm = GoogleGenerativeAI(model="gemini-1.5-flash-latest", google_api_key=os.getenv("GEMINI_API_KEY"))
40
 
41
  template = """
42
- INSTRUCTION: Act as Personal Assistant chatbot of The Hexatech, an IT Startup that provides core hexa services: design, deployment, web development, ai & ml, software. this is conversation \
43
  to a user who wants to get his query solved about The Hexatech. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
44
  If you don't know any ANSWER, say you don't know \
45
  Always follow general guardrails before generating any response. \
 
36
  docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
37
 
38
 
39
+ llm = GoogleGenerativeAI(model="Gemini 2.5 Flash-Lite Preview 06-17", google_api_key=os.getenv("GEMINI_API_KEY"))
40
 
41
  template = """
42
+ INSTRUCTION: Act as Customer Support chatbot of The Hexatech, an IT Startup that provides Daas \
43
  to a user who wants to get his query solved about The Hexatech. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
44
  If you don't know any ANSWER, say you don't know \
45
  Always follow general guardrails before generating any response. \
chatbotmemory.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
2
+ from langchain_community.document_loaders import TextLoader
3
+ from langchain.schema.runnable import RunnablePassthrough
4
+ from langchain.schema.output_parser import StrOutputParser
5
+ from langchain_pinecone import PineconeVectorStore
6
+ from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate
7
+ from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
8
+ from dotenv import load_dotenv, find_dotenv
9
+ import os
10
+ from pinecone import Pinecone, PodSpec
11
+
12
+ load_dotenv(find_dotenv())
13
+
14
+ class ChatbotMemory():
15
+
16
+ loader = TextLoader('dataset.txt', autodetect_encoding=True)
17
+ documents = loader.load()
18
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=103)
19
+ docs = text_splitter.split_documents(documents)
20
+
21
+ embeddings = GoogleGenerativeAIEmbeddings(
22
+ model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY"))
23
+
24
+ pinecone = Pinecone(
25
+ api_key=os.environ.get("PINECONE_API_KEY")
26
+ )
27
+
28
+ index_name = "chatbot"
29
+
30
+ if index_name not in pinecone.list_indexes().names():
31
+ pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter"))
32
+ docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)
33
+ else:
34
+ docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
35
+
36
+
37
+ llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))
38
+
39
+ def contextualized_question(input: dict):
40
+ if input.get("chat_history"):
41
+ llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))
42
+
43
+ contextualize_q_system_prompt = """Given a chat history and the latest user question \
44
+ which might reference context in the chat history, formulate a standalone question \
45
+ which can be understood without the chat history. Do NOT answer the question, \
46
+ just reformulate it if needed and otherwise return it as is."""
47
+
48
+ contextualize_q_prompt = ChatPromptTemplate.from_messages(
49
+ [
50
+ ("system", contextualize_q_system_prompt),
51
+ MessagesPlaceholder(variable_name="chat_history"),
52
+ ("human", "{question}"),
53
+ ]
54
+ )
55
+
56
+ contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()
57
+ return contextualize_q_chain
58
+ else:
59
+ return input["question"]
60
+
61
+ template = """
62
+ INSTRUCTION: Act as PRAN, Personal Assistant of Rohan Shaw, this is conversation \
63
+ to a user who wants to get his query solved about Rohan Shaw, his professional and personal life. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
64
+ If you don't know any ANSWER, say you don't know \
65
+ Always follow general guardrails before generating any response. \
66
+ Always try to keep the conversation in context to Rohan Shaw. Keep your replies short \
67
+ compassionate and informative.\
68
+ Give the answer from the CONTEXT\
69
+ You should help user to get his query solved and also try to increase engagement and promoting Rohan Shaw.\
70
+
71
+ CONTEXT: {context}
72
+ QUESTION: {question}
73
+ ANSWER:
74
+ """
75
+
76
+ prompt = ChatPromptTemplate.from_messages(
77
+ [
78
+ ("system", template),
79
+ MessagesPlaceholder(variable_name="chat_history"),
80
+ ("human", "{question}"),
81
+ ]
82
+ )
83
+
84
+
85
+ rag_chain = (
86
+ RunnablePassthrough.assign(
87
+ context=contextualized_question | docsearch.as_retriever()
88
+ )
89
+ | prompt
90
+ | llm
91
+ )
dataset.txt CHANGED
@@ -1,91 +1,74 @@
1
- Design. Develop. Dominate
2
- Hexa Core Services For Your Business.
3
- we build success stories for you, get your business a lucky touch
4
 
5
- previous clients:
6
- Viridiv
7
- Healscure
8
- CodeLens
9
  Viridiv
10
  Healscure
11
  CodeLens
12
 
13
- Be the Achievers of Achievers
14
- Unlock the full potential of your business with The Hexatech, your all-in-one partner for A-Z business solutions. From stunning designs to powerful development, strategic marketing, seamless deployment, and expert SEO—we’re here to turn your vision into reality.
15
- Lets build your success story together.
16
-
17
- What we offer
18
- At The Hexatech, we provide end-to-end business solutions tailored to meet your unique goals. From building cutting-edge websites to harnessing the power of AI, we’ve got you covered. Our expert team ensures that your ideas turn into reality with seamless deployment, stunning designs, robust databases, and custom software solutions. Let us take your business to the next level with innovative technology and creative strategies.
19
- Web Development
20
- We create high-performance, responsive websites that are not only visually stunning but also optimized for functionality. Whether it's a corporate site or an e-commerce platform, we bring your online presence to life.
21
- AI & Machine Learning
22
- Unlock the future with AI and machine learning solutions that help you automate processes, analyze data, and make smarter decisions. From predictive analytics to personalized customer experiences, we’ll help you stay ahead of the curve.
23
- Deployment & Maintenance
24
- Leave the technicalities to us! We offer seamless deployment services, ensuring your web and software solutions are launched efficiently with ongoing maintenance to keep everything running smoothly.
25
- Design Services
26
- First impressions matter! Our design team crafts intuitive, eye-catching designs that resonate with your brand’s identity. From branding to UI/UX design, we make sure your product looks as good as it performs.
27
- Database Management
28
- Your data is your most valuable asset. We offer comprehensive database solutions, from design and setup to optimization and management, ensuring secure, scalable, and efficient data handling.
29
- Custom Software
30
- From concept to execution, we build tailored software solutions that address your unique business needs. Whether it’s enterprise software or mobile apps, we deliver solutions that drive efficiency and growth.
31
-
32
- About Us
33
- At The Hexatech, we’re more than just a service provider—we’re your strategic partner in building and scaling your business. With a passion for innovation and a commitment to excellence, we deliver A-Z business solutions that empower companies to thrive in the digital age. Founded with the mission to simplify complex processes and accelerate growth, we specialize in everything from web development and AI to design, deployment, and custom software. Whether you’re a startup launching an MVP or an established business looking to redefine your product, we tailor our solutions to meet your unique needs. Our team of expert developers, designers, and strategists work closely with you to bring your vision to life. At The Hexatech, we believe in a hands-on approach, diving deep into customer development and market analysis to ensure that every solution we deliver is not only functional but also impactful. We’re here to help you Design, Develop, Dominate—because your success is our success.
34
-
35
- Testimonials
36
- "I can't say enough about the quality of work I have received. I highly recommend The Hexatech for anyone looking for an exceptional web development company."
 
 
 
 
 
 
 
 
 
 
37
  Raj Shaw
38
- moodmeter
39
- "The Hexatech has been a game changer for my business. Their team is incredibly skilled and responsive, and their work has been exceptional. I highly recommend The Hexatech for any web development & AI needs."
 
 
40
  Rishi Pipaliya
41
- Rose & Fern
42
- "The Hexatech team has been incredibly professional and responsive. They have been able to meet our needs and exceed our expectations."
 
 
43
  Jeet Ghosh
44
- LumaticAI
45
-
46
- How We Work
47
- 01
48
- Click on quote
49
- 02
50
- Submit the quote form
51
- 03
52
- We get in touch through meet
53
- 04
54
- Deal is finalized
55
- 05
56
- Project is delivered after payment
57
 
58
  Have a project in mind? Let's Talk
59
 
60
- Our Location
61
  Om Shayona Arcade, Gota, Ahmedabad, Gujarat, India, 382481
62
 
63
- Our Company
64
- At The Hexatech, we believe that every great business starts with a bold idea. We specialize in providing end-to-end business solutions that help entrepreneurs and businesses bring their visions to life. From designing unique digital experiences, to developing state-of-the-art software, to implementing cutting-edge AI and machine learning systems — we do it all.
65
- Our approach is holistic. We not only build and deploy, but also support businesses throughout their entire lifecycle. Whether you are launching a new product, refining an existing one, or optimizing for scalability, we ensure that every step is handled with care and precision. Our dedicated team of experts will work with you to innovate, develop, and grow your brand to achieve sustained market success.
66
- The Hexatech is your trusted partner for turning digital challenges into opportunities, combining creativity, technology, and business acumen to help you stay ahead of the competition. With our tailored solutions, we aim to transform your business and ensure it thrives in today’s fast-evolving digital world.
67
-
68
- Meet Our Founders
69
- Rohan Shaw
70
- Co-Founder & CEO
71
- https://instagram.com/the_rohanshaw
72
- https://linkedin.com/in/rohan-shaw-rs
73
- https://x.com/heyMeRohan
74
- Sujal Merani
75
- Co-Founder & CSO
76
- https://instagram.com/sujal__merani
77
- https://linkedin.com/in/sujal-merani-150316329
78
- https://x.com/msujal_21
79
- Fresil Patel
80
- Co-Founder & CMO
81
- https://instagram.com/fresil_patel
82
- https://linkedin.com/in/fresilpatel
83
- https://x.com/PatelFresil
84
-
85
  Contact
86
- Email : info@thehexatech.com
87
- Business : connect@thehexatech.com
88
- Phone : +91 9749525157 | +91 9727226136 | +91 8320372440
89
  Head Office :
90
  211, 2nd floor, Om Shayona Arcade, Gota, Ahmedabad, Gujarat, India
91
 
@@ -96,4 +79,12 @@ website : https://thehexatech.com
96
  LinkedIn: https://www.linkedin.com/company/the-hexatech/about/
97
  Instagram: https://www.instagram.com/thehexatech/
98
 
99
- © 2024, The Hexatech. All Rights Reserved.
 
 
 
 
 
 
 
 
 
1
+ Tagline : Data. Decision. Domination
 
 
2
 
3
+ - Data as a Service For Your Business.
4
+ - we build success stories for you, get your business a lucky touch
5
+
6
+ - Clients:
7
  Viridiv
8
  Healscure
9
  CodeLens
10
 
11
+ - Be the Achiever of Achievers
12
+ Transform raw web data into actionable intelligence with The Hexatech, your premier partner for Data-as-a-Service solutions. We handle the complexities of web scraping, data extraction, and validation, delivering clean, structured data and data apis directly to you. Focus on insights, not infrastructure.
13
+ Let's power your decisions with precise data.
14
+
15
+ - What We Offer: Your Data, Streamlined
16
+ At TheHexaTech, we specialize in delivering precise and scalable Data-as-a-Service solutions. We extract, process, and deliver the web data
17
+ you need to drive critical business decisions. Our expert team handles every complexity, from sophisticated web and app scraping to AI-powered data processing, ensuring you receive clean, actionable insights without the hassle.
18
+
19
+ - Web Scraping & Data Extraction
20
+ Get accurate, large-scale data from any public website. We handle dynamic content, anti-bot measures, and complex structures to deliver the
21
+ exact data points you need for market research, competitive analysis, and more.
22
+
23
+ - App Scraping & Mobile Data
24
+ Access valuable data from mobile applications. Our solutions extract information from public app listings, reviews, and other available app-based data sources, providing unique insights into mobile trends and user sentiment.
25
+
26
+ - Custom Scraping Solutions
27
+ Have unique data needs? We design and implement bespoke scraping solutions tailored to your specific requirements, ensuring you get precisely the data you're looking for, no matter the complexity or source.
28
+
29
+ - Scraping APIs & Real-time Feeds
30
+ Integrate directly with our robust APIs for real-time, on-demand data access. Set up continuous data feeds to keep your systems updated with the freshest information for dynamic pricing, instant alerts, and live monitoring.
31
+
32
+ - AI/ML Powered Data Processing
33
+ Beyond extraction, we leverage AI and Machine Learning to process, clean, and enrich your data. From sentiment analysis and categorization to deduplication and natural language processing, we turn raw data into intelligent insights.
34
+
35
+ - AI/ML Solutions
36
+ Unlock deeper insights with custom Artificial Intelligence and Machine Learning solutions. From predictive analytics and trend forecasting to sentiment analysis and intelligent automation, we build models that leverage your data to drive innovation.
37
+
38
+ - About TheHexaTech
39
+ At The Hexatech, we are more than just a service provider — we are your dedicated partner in unlocking the true power of data. With a steadfast commitment to accuracy, scalability, and innovation, we deliver comprehensive Data-as-a-Service solutions designed to empower businesses in the digital age. Founded with the mission to simplify complex data acquisition and accelerate informed decision-making, we specialize in transforming vast amounts of raw web information into clean, structured, and actionable intelligence. Whether you're a startup seeking market insights or an established enterprise aiming for competitive advantage, we tailor our DaaS solutions to meet your unique and evolving data needs. Our team of expert data engineers, analysts, and AI/ML specialists works diligently to ensure precise data extraction, rigorous validation, and seamless delivery. At TheHexaTech, we believe in a client-centric approach, diving deep into your specific requirements to provide data solutions that are not only reliable but also directly impactful to your strategic goals. We're here to help you Extract, Analyze,
40
+ Excel – because your data-driven success is our ultimate mission.
41
+
42
+ - What Our Clients Say About Our Data Solutions
43
+ "The data quality and the speed of delivery from TheHexaTech are simply unparalleled. Their web scraping services have provided us with invaluable market insights, giving us a significant competitive edge."
44
+ Moodmeter Logo
45
  Raj Shaw
46
+ Market Intelligence Lead, Moodmeter
47
+
48
+ "TheHexaTech has been a game-changer for our data strategy. Their custom scraping solutions and AI/ML expertise transformed how we gather and analyze information. Highly recommend them for any data needs!"
49
+ Rose & Fern Logo
50
  Rishi Pipaliya
51
+ Head of Product, Rose & Fern
52
+
53
+ "The Hexatech team is incredibly professional and responsive. They not only met our complex data extraction needs but also exceeded our expectations with their advanced data validation and seamless delivery."
54
+ LumaticAI Logo
55
  Jeet Ghosh
56
+ VP of Operations, LumaticAI
57
+
58
+ - Our Data Solutions Process
59
+ 01. Get in touch to discuss your specific data requirements, target sources, and desired output formats. We'll provide a custom quote.
60
+ 02. Our experts design a tailored data extraction solution, configuring crawlers and ensuring robust anti-blocking measures.
61
+ 03. We begin the high-volume data collection, applying rigorous quality checks and structuring the data for accuracy and usability.
62
+ 04. Receive your clean, structured data in your preferred format (API, CSV, JSON, etc.) directly to your systems or storage.
63
+ 05. We provide continuous monitoring, maintenance, and support to ensure your data feeds remain consistent and reliable.
 
 
 
 
 
64
 
65
  Have a project in mind? Let's Talk
66
 
67
+ - Our Location
68
  Om Shayona Arcade, Gota, Ahmedabad, Gujarat, India, 382481
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  Contact
71
+ Email : thehexatech.official@gmail.com
 
 
72
  Head Office :
73
  211, 2nd floor, Om Shayona Arcade, Gota, Ahmedabad, Gujarat, India
74
 
 
79
  LinkedIn: https://www.linkedin.com/company/the-hexatech/about/
80
  Instagram: https://www.instagram.com/thehexatech/
81
 
82
+ © 2024, The Hexatech. All Rights Reserved.
83
+
84
+ - Our Company
85
+ At TheHexaTech, we believe that precise, actionable data is the cornerstone of every successful modern business. We specialize in providing
86
+ comprehensive Data-as-a-Service (DaaS) solutions that empower entrepreneurs and established enterprises to harness the vast potential of web intelligence. From sophisticated web and app scraping to advanced AI and machine learning for data processing and insights – we manage the
87
+ entire data lifecycle for you.
88
+ Our approach is entirely client-focused. We not only extract and deliver high-quality data, but also serve as your strategic partner in transforming raw information into a competitive advantage. Whether you are seeking real-time market trends, competitive pricing intelligence, lead generation, or custom data sets for analytical models, we ensure every data point is accurate, reliable, and tailored to your specific needs.
89
+ TheHexaTech is your trusted ally for navigating the complexities of big data, turning digital noise into clear opportunities. Our dedicated
90
+ team of data experts combines cutting-edge technology with deep business acumen to help you stay ahead in today’s data-intensive world. With our managed DaaS solutions, we aim to streamline your data acquisition process, allowing you to focus on innovation and growth.
requirements.txt CHANGED
@@ -1,11 +1,11 @@
1
- langchain==0.1.6
2
- langchain-community==0.0.19
3
- langchain-core==0.1.23
4
- pinecone-client
5
- python-dotenv
6
- fastapi
7
- langchain_google_genai
8
- langchain-pinecone
9
- chardet
10
- uvicorn
11
- unstructured
 
1
+ langchain
2
+ langchain-community
3
+ langchain-core
4
+ langchain-unstructured
5
+ pinecone-client
6
+ python-dotenv
7
+ langchain_google_genai
8
+ langchain-pinecone
9
+ chardet
10
+ uvicorn
11
+ fastapi
scrapWebpage.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+
4
+ # def load_from_website(url):
5
+ # response = requests.get(url)
6
+ # soup = BeautifulSoup(response.content, 'html.parser')
7
+ # text = soup.get_text(separator="\n")
8
+ # return [text]
9
+
10
+ # print(load_from_website("https://thehexatech.com"))
11
+ # print()
12
+ # print(load_from_website("https://thehexatech.com/about/index.html"))
13
+ # print()
14
+ # print(load_from_website("https://thehexatech.com/quote/index.html"))
15
+ # print()
16
+
17
+
18
+ import asyncio
19
+ from langchain_unstructured import UnstructuredLoader
20
+
21
+ page_url = "https://thehexatech.com/about"
22
+ loader = UnstructuredLoader(web_url=page_url)
23
+
24
+ docs = []
25
+
26
+ async def get_data():
27
+ global docs
28
+ async for doc in loader.alazy_load():
29
+ docs.append(doc)
30
+
31
+ async def main():
32
+ await get_data()
33
+ # print(docs)
34
+ for doc in docs:
35
+ print(doc.page_content)
36
+
37
+ asyncio.run(main())
38
+
39
+
test.ipynb ADDED
@@ -0,0 +1,791 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import asyncio\n",
10
+ "from langchain_unstructured import UnstructuredLoader"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 6,
16
+ "metadata": {},
17
+ "outputs": [],
18
+ "source": [
19
+ "page_url = [\"https://thehexatech.com\", \"https://thehexatech.com/about/index.html\", \"https://thehexatech.com/quote/index.html\"] "
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 16,
25
+ "metadata": {},
26
+ "outputs": [],
27
+ "source": [
28
+ "async def get_data():\n",
29
+ " docs = []\n",
30
+ " for url in page_url:\n",
31
+ " loader = UnstructuredLoader(web_url=url)\n",
32
+ " async for doc in loader.alazy_load():\n",
33
+ " docs.append(doc)\n",
34
+ " return docs"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": 17,
40
+ "metadata": {},
41
+ "outputs": [],
42
+ "source": [
43
+ "try:\n",
44
+ " dataset = await get_data()\n",
45
+ "except RuntimeError:\n",
46
+ " print(\"This environment may not support 'await' outside of an async context.\")"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "code",
51
+ "execution_count": 33,
52
+ "metadata": {},
53
+ "outputs": [
54
+ {
55
+ "name": "stdout",
56
+ "output_type": "stream",
57
+ "text": [
58
+ "category_depth\n",
59
+ "languages\n",
60
+ "filetype\n",
61
+ "url\n",
62
+ "category\n",
63
+ "element_id\n",
64
+ "category_depth\n",
65
+ "languages\n",
66
+ "filetype\n",
67
+ "url\n",
68
+ "category\n",
69
+ "element_id\n",
70
+ "languages\n",
71
+ "filetype\n",
72
+ "parent_id\n",
73
+ "url\n",
74
+ "category\n",
75
+ "element_id\n",
76
+ "category_depth\n",
77
+ "languages\n",
78
+ "filetype\n",
79
+ "url\n",
80
+ "category\n",
81
+ "element_id\n",
82
+ "category_depth\n",
83
+ "languages\n",
84
+ "filetype\n",
85
+ "url\n",
86
+ "category\n",
87
+ "element_id\n",
88
+ "category_depth\n",
89
+ "languages\n",
90
+ "filetype\n",
91
+ "url\n",
92
+ "category\n",
93
+ "element_id\n",
94
+ "category_depth\n",
95
+ "languages\n",
96
+ "filetype\n",
97
+ "url\n",
98
+ "category\n",
99
+ "element_id\n",
100
+ "category_depth\n",
101
+ "languages\n",
102
+ "filetype\n",
103
+ "url\n",
104
+ "category\n",
105
+ "element_id\n",
106
+ "category_depth\n",
107
+ "languages\n",
108
+ "filetype\n",
109
+ "url\n",
110
+ "category\n",
111
+ "element_id\n",
112
+ "category_depth\n",
113
+ "languages\n",
114
+ "filetype\n",
115
+ "parent_id\n",
116
+ "url\n",
117
+ "category\n",
118
+ "element_id\n",
119
+ "languages\n",
120
+ "filetype\n",
121
+ "parent_id\n",
122
+ "url\n",
123
+ "category\n",
124
+ "element_id\n",
125
+ "languages\n",
126
+ "filetype\n",
127
+ "parent_id\n",
128
+ "url\n",
129
+ "category\n",
130
+ "element_id\n",
131
+ "category_depth\n",
132
+ "languages\n",
133
+ "filetype\n",
134
+ "parent_id\n",
135
+ "url\n",
136
+ "category\n",
137
+ "element_id\n",
138
+ "languages\n",
139
+ "filetype\n",
140
+ "parent_id\n",
141
+ "url\n",
142
+ "category\n",
143
+ "element_id\n",
144
+ "category_depth\n",
145
+ "languages\n",
146
+ "filetype\n",
147
+ "parent_id\n",
148
+ "url\n",
149
+ "category\n",
150
+ "element_id\n",
151
+ "languages\n",
152
+ "filetype\n",
153
+ "parent_id\n",
154
+ "url\n",
155
+ "category\n",
156
+ "element_id\n",
157
+ "category_depth\n",
158
+ "languages\n",
159
+ "filetype\n",
160
+ "parent_id\n",
161
+ "url\n",
162
+ "category\n",
163
+ "element_id\n",
164
+ "languages\n",
165
+ "filetype\n",
166
+ "parent_id\n",
167
+ "url\n",
168
+ "category\n",
169
+ "element_id\n",
170
+ "category_depth\n",
171
+ "languages\n",
172
+ "filetype\n",
173
+ "parent_id\n",
174
+ "url\n",
175
+ "category\n",
176
+ "element_id\n",
177
+ "languages\n",
178
+ "filetype\n",
179
+ "parent_id\n",
180
+ "url\n",
181
+ "category\n",
182
+ "element_id\n",
183
+ "category_depth\n",
184
+ "languages\n",
185
+ "filetype\n",
186
+ "parent_id\n",
187
+ "url\n",
188
+ "category\n",
189
+ "element_id\n",
190
+ "languages\n",
191
+ "filetype\n",
192
+ "parent_id\n",
193
+ "url\n",
194
+ "category\n",
195
+ "element_id\n",
196
+ "category_depth\n",
197
+ "languages\n",
198
+ "filetype\n",
199
+ "parent_id\n",
200
+ "url\n",
201
+ "category\n",
202
+ "element_id\n",
203
+ "languages\n",
204
+ "filetype\n",
205
+ "parent_id\n",
206
+ "url\n",
207
+ "category\n",
208
+ "element_id\n",
209
+ "category_depth\n",
210
+ "languages\n",
211
+ "filetype\n",
212
+ "parent_id\n",
213
+ "url\n",
214
+ "category\n",
215
+ "element_id\n",
216
+ "languages\n",
217
+ "filetype\n",
218
+ "parent_id\n",
219
+ "url\n",
220
+ "category\n",
221
+ "element_id\n",
222
+ "category_depth\n",
223
+ "languages\n",
224
+ "filetype\n",
225
+ "parent_id\n",
226
+ "url\n",
227
+ "category\n",
228
+ "element_id\n",
229
+ "languages\n",
230
+ "filetype\n",
231
+ "parent_id\n",
232
+ "url\n",
233
+ "category\n",
234
+ "element_id\n",
235
+ "category_depth\n",
236
+ "languages\n",
237
+ "filetype\n",
238
+ "parent_id\n",
239
+ "url\n",
240
+ "category\n",
241
+ "element_id\n",
242
+ "category_depth\n",
243
+ "languages\n",
244
+ "filetype\n",
245
+ "parent_id\n",
246
+ "url\n",
247
+ "category\n",
248
+ "element_id\n",
249
+ "category_depth\n",
250
+ "languages\n",
251
+ "filetype\n",
252
+ "parent_id\n",
253
+ "url\n",
254
+ "category\n",
255
+ "element_id\n",
256
+ "category_depth\n",
257
+ "languages\n",
258
+ "filetype\n",
259
+ "url\n",
260
+ "category\n",
261
+ "element_id\n",
262
+ "category_depth\n",
263
+ "languages\n",
264
+ "filetype\n",
265
+ "parent_id\n",
266
+ "url\n",
267
+ "category\n",
268
+ "element_id\n",
269
+ "category_depth\n",
270
+ "languages\n",
271
+ "filetype\n",
272
+ "parent_id\n",
273
+ "url\n",
274
+ "category\n",
275
+ "element_id\n",
276
+ "category_depth\n",
277
+ "languages\n",
278
+ "filetype\n",
279
+ "url\n",
280
+ "category\n",
281
+ "element_id\n",
282
+ "category_depth\n",
283
+ "languages\n",
284
+ "filetype\n",
285
+ "parent_id\n",
286
+ "url\n",
287
+ "category\n",
288
+ "element_id\n",
289
+ "category_depth\n",
290
+ "languages\n",
291
+ "filetype\n",
292
+ "parent_id\n",
293
+ "url\n",
294
+ "category\n",
295
+ "element_id\n",
296
+ "category_depth\n",
297
+ "languages\n",
298
+ "filetype\n",
299
+ "url\n",
300
+ "category\n",
301
+ "element_id\n",
302
+ "category_depth\n",
303
+ "languages\n",
304
+ "filetype\n",
305
+ "parent_id\n",
306
+ "url\n",
307
+ "category\n",
308
+ "element_id\n",
309
+ "category_depth\n",
310
+ "languages\n",
311
+ "filetype\n",
312
+ "parent_id\n",
313
+ "url\n",
314
+ "category\n",
315
+ "element_id\n",
316
+ "category_depth\n",
317
+ "languages\n",
318
+ "filetype\n",
319
+ "url\n",
320
+ "category\n",
321
+ "element_id\n",
322
+ "category_depth\n",
323
+ "languages\n",
324
+ "filetype\n",
325
+ "parent_id\n",
326
+ "url\n",
327
+ "category\n",
328
+ "element_id\n",
329
+ "category_depth\n",
330
+ "languages\n",
331
+ "filetype\n",
332
+ "url\n",
333
+ "category\n",
334
+ "element_id\n",
335
+ "category_depth\n",
336
+ "languages\n",
337
+ "filetype\n",
338
+ "parent_id\n",
339
+ "url\n",
340
+ "category\n",
341
+ "element_id\n",
342
+ "languages\n",
343
+ "filetype\n",
344
+ "parent_id\n",
345
+ "url\n",
346
+ "category\n",
347
+ "element_id\n",
348
+ "category_depth\n",
349
+ "languages\n",
350
+ "filetype\n",
351
+ "parent_id\n",
352
+ "url\n",
353
+ "category\n",
354
+ "element_id\n",
355
+ "languages\n",
356
+ "filetype\n",
357
+ "parent_id\n",
358
+ "url\n",
359
+ "category\n",
360
+ "element_id\n",
361
+ "category_depth\n",
362
+ "languages\n",
363
+ "filetype\n",
364
+ "parent_id\n",
365
+ "url\n",
366
+ "category\n",
367
+ "element_id\n",
368
+ "languages\n",
369
+ "filetype\n",
370
+ "parent_id\n",
371
+ "url\n",
372
+ "category\n",
373
+ "element_id\n",
374
+ "category_depth\n",
375
+ "languages\n",
376
+ "filetype\n",
377
+ "url\n",
378
+ "category\n",
379
+ "element_id\n",
380
+ "category_depth\n",
381
+ "languages\n",
382
+ "filetype\n",
383
+ "parent_id\n",
384
+ "url\n",
385
+ "category\n",
386
+ "element_id\n",
387
+ "category_depth\n",
388
+ "languages\n",
389
+ "filetype\n",
390
+ "parent_id\n",
391
+ "url\n",
392
+ "category\n",
393
+ "element_id\n",
394
+ "category_depth\n",
395
+ "languages\n",
396
+ "filetype\n",
397
+ "url\n",
398
+ "category\n",
399
+ "element_id\n",
400
+ "category_depth\n",
401
+ "link_texts\n",
402
+ "link_urls\n",
403
+ "languages\n",
404
+ "filetype\n",
405
+ "url\n",
406
+ "category\n",
407
+ "element_id\n",
408
+ "category_depth\n",
409
+ "link_texts\n",
410
+ "link_urls\n",
411
+ "languages\n",
412
+ "filetype\n",
413
+ "url\n",
414
+ "category\n",
415
+ "element_id\n",
416
+ "languages\n",
417
+ "filetype\n",
418
+ "parent_id\n",
419
+ "url\n",
420
+ "category\n",
421
+ "element_id\n",
422
+ "category_depth\n",
423
+ "languages\n",
424
+ "filetype\n",
425
+ "url\n",
426
+ "category\n",
427
+ "element_id\n",
428
+ "languages\n",
429
+ "filetype\n",
430
+ "parent_id\n",
431
+ "url\n",
432
+ "category\n",
433
+ "element_id\n",
434
+ "languages\n",
435
+ "filetype\n",
436
+ "parent_id\n",
437
+ "url\n",
438
+ "category\n",
439
+ "element_id\n",
440
+ "languages\n",
441
+ "filetype\n",
442
+ "parent_id\n",
443
+ "url\n",
444
+ "category\n",
445
+ "element_id\n",
446
+ "category_depth\n",
447
+ "languages\n",
448
+ "filetype\n",
449
+ "parent_id\n",
450
+ "url\n",
451
+ "category\n",
452
+ "element_id\n",
453
+ "category_depth\n",
454
+ "languages\n",
455
+ "filetype\n",
456
+ "parent_id\n",
457
+ "url\n",
458
+ "category\n",
459
+ "element_id\n",
460
+ "category_depth\n",
461
+ "languages\n",
462
+ "filetype\n",
463
+ "url\n",
464
+ "category\n",
465
+ "element_id\n",
466
+ "category_depth\n",
467
+ "languages\n",
468
+ "filetype\n",
469
+ "parent_id\n",
470
+ "url\n",
471
+ "category\n",
472
+ "element_id\n",
473
+ "category_depth\n",
474
+ "languages\n",
475
+ "filetype\n",
476
+ "url\n",
477
+ "category\n",
478
+ "element_id\n",
479
+ "category_depth\n",
480
+ "languages\n",
481
+ "filetype\n",
482
+ "parent_id\n",
483
+ "url\n",
484
+ "category\n",
485
+ "element_id\n",
486
+ "category_depth\n",
487
+ "languages\n",
488
+ "filetype\n",
489
+ "url\n",
490
+ "category\n",
491
+ "element_id\n",
492
+ "category_depth\n",
493
+ "languages\n",
494
+ "filetype\n",
495
+ "parent_id\n",
496
+ "url\n",
497
+ "category\n",
498
+ "element_id\n",
499
+ "category_depth\n",
500
+ "link_texts\n",
501
+ "link_urls\n",
502
+ "languages\n",
503
+ "filetype\n",
504
+ "parent_id\n",
505
+ "url\n",
506
+ "category\n",
507
+ "element_id\n",
508
+ "category_depth\n",
509
+ "link_texts\n",
510
+ "link_urls\n",
511
+ "languages\n",
512
+ "filetype\n",
513
+ "parent_id\n",
514
+ "url\n",
515
+ "category\n",
516
+ "element_id\n",
517
+ "category_depth\n",
518
+ "languages\n",
519
+ "filetype\n",
520
+ "parent_id\n",
521
+ "url\n",
522
+ "category\n",
523
+ "element_id\n",
524
+ "category_depth\n",
525
+ "languages\n",
526
+ "filetype\n",
527
+ "url\n",
528
+ "category\n",
529
+ "element_id\n",
530
+ "category_depth\n",
531
+ "languages\n",
532
+ "filetype\n",
533
+ "url\n",
534
+ "category\n",
535
+ "element_id\n",
536
+ "languages\n",
537
+ "filetype\n",
538
+ "parent_id\n",
539
+ "url\n",
540
+ "category\n",
541
+ "element_id\n",
542
+ "category_depth\n",
543
+ "languages\n",
544
+ "filetype\n",
545
+ "parent_id\n",
546
+ "url\n",
547
+ "category\n",
548
+ "element_id\n",
549
+ "category_depth\n",
550
+ "languages\n",
551
+ "filetype\n",
552
+ "parent_id\n",
553
+ "url\n",
554
+ "category\n",
555
+ "element_id\n",
556
+ "category_depth\n",
557
+ "languages\n",
558
+ "filetype\n",
559
+ "url\n",
560
+ "category\n",
561
+ "element_id\n",
562
+ "category_depth\n",
563
+ "link_texts\n",
564
+ "link_urls\n",
565
+ "languages\n",
566
+ "filetype\n",
567
+ "url\n",
568
+ "category\n",
569
+ "element_id\n",
570
+ "category_depth\n",
571
+ "link_texts\n",
572
+ "link_urls\n",
573
+ "languages\n",
574
+ "filetype\n",
575
+ "url\n",
576
+ "category\n",
577
+ "element_id\n",
578
+ "languages\n",
579
+ "filetype\n",
580
+ "parent_id\n",
581
+ "url\n",
582
+ "category\n",
583
+ "element_id\n",
584
+ "category_depth\n",
585
+ "languages\n",
586
+ "filetype\n",
587
+ "url\n",
588
+ "category\n",
589
+ "element_id\n",
590
+ "category_depth\n",
591
+ "languages\n",
592
+ "filetype\n",
593
+ "url\n",
594
+ "category\n",
595
+ "element_id\n",
596
+ "category_depth\n",
597
+ "languages\n",
598
+ "filetype\n",
599
+ "url\n",
600
+ "category\n",
601
+ "element_id\n",
602
+ "languages\n",
603
+ "filetype\n",
604
+ "parent_id\n",
605
+ "url\n",
606
+ "category\n",
607
+ "element_id\n",
608
+ "category_depth\n",
609
+ "languages\n",
610
+ "filetype\n",
611
+ "parent_id\n",
612
+ "url\n",
613
+ "category\n",
614
+ "element_id\n",
615
+ "category_depth\n",
616
+ "languages\n",
617
+ "filetype\n",
618
+ "parent_id\n",
619
+ "url\n",
620
+ "category\n",
621
+ "element_id\n",
622
+ "category_depth\n",
623
+ "languages\n",
624
+ "filetype\n",
625
+ "url\n",
626
+ "category\n",
627
+ "element_id\n",
628
+ "category_depth\n",
629
+ "link_texts\n",
630
+ "link_urls\n",
631
+ "languages\n",
632
+ "filetype\n",
633
+ "url\n",
634
+ "category\n",
635
+ "element_id\n",
636
+ "category_depth\n",
637
+ "link_texts\n",
638
+ "link_urls\n",
639
+ "languages\n",
640
+ "filetype\n",
641
+ "url\n",
642
+ "category\n",
643
+ "element_id\n",
644
+ "languages\n",
645
+ "filetype\n",
646
+ "parent_id\n",
647
+ "url\n",
648
+ "category\n",
649
+ "element_id\n"
650
+ ]
651
+ }
652
+ ],
653
+ "source": [
654
+ "for data in dataset:\n",
655
+ " # print(f\"{data.page_content} \\n {data.link_texts if data.link_texts else \"\"} -------- {data.link_urls if data.link_urls else \"\"} \")\n",
656
+ " for mt in data.metadata:\n",
657
+ " print(mt)"
658
+ ]
659
+ },
660
+ {
661
+ "cell_type": "code",
662
+ "execution_count": 19,
663
+ "metadata": {},
664
+ "outputs": [
665
+ {
666
+ "data": {
667
+ "text/plain": [
668
+ "[Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '3c1cec20a25849e446035e7a95eb6b54'}, page_content='Design. Develop. Deploy'),\n",
669
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'a9be7f1e667e1a81d4fec6bd42699600'}, page_content='Hexa Core Services For Your Business.'),\n",
670
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a9be7f1e667e1a81d4fec6bd42699600', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': 'e76565fb197dfa95003d0f3bae11010d'}, page_content='we build success stories for you, get your business a lucky touch'),\n",
671
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '572a046c5cc30997de111b1b10785237'}, page_content='Viridiv'),\n",
672
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '65184b4439938fb6b4bc6e5d337866fc'}, page_content='Healscure'),\n",
673
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '3831b9a9f6395c6e2ea6878650243a46'}, page_content='CodeLens'),\n",
674
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '942ae29a509225086d1c4f2f57fb3f98'}, page_content='Viridiv'),\n",
675
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '77ddcd23639bec902ea3619f141696db'}, page_content='Healscure'),\n",
676
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'a8321dedec3fe8899162f1b272783251'}, page_content='CodeLens'),\n",
677
+ " Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a8321dedec3fe8899162f1b272783251', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'dc4cbd6254cb64898decd517056100b3'}, page_content='Be the Achievers of Achievers'),\n",
678
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'dc4cbd6254cb64898decd517056100b3', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': '464461078fbed9c7787522bcb270409d'}, page_content='Unlock the full potential of your business with The Hexatech, your all-in-one partner for A-Z business solutions. From stunning designs to powerful development, strategic marketing, seamless deployment, and expert SEO—we’re here to turn your vision into reality.'),\n",
679
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'dc4cbd6254cb64898decd517056100b3', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': 'bb46e06fb19a60f7faa59b09b06a0f0b'}, page_content='Let’s build your success story together.'),\n",
680
+ " Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a8321dedec3fe8899162f1b272783251', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'a813a122ea83dfb365f3f8424aba978c'}, page_content='What we offer'),\n",
681
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a813a122ea83dfb365f3f8424aba978c', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': '8cd09a13b4f8df03d844d349b7fe2748'}, page_content='At The Hexatech, we provide end-to-end business solutions tailored to meet your unique goals. From building cutting-edge websites to harnessing the power of AI, we’ve got you covered. Our expert team ensures that your ideas turn into reality with seamless deployment, stunning designs, robust databases, and custom software solutions. Let us take your business to the next level with innovative technology and creative strategies.'),\n",
682
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a813a122ea83dfb365f3f8424aba978c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'b25451e0a7dc77c405a8156e32f9200a'}, page_content='Web Development'),\n",
683
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'b25451e0a7dc77c405a8156e32f9200a', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': '13eec4f02e0fd3d7824e7cc8c26f7a0f'}, page_content=\"We create high-performance, responsive websites that are not only visually stunning but also optimized for functionality. Whether it's a corporate site or an e-commerce platform, we bring your online presence to life.\"),\n",
684
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a813a122ea83dfb365f3f8424aba978c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'ef0b6aa953694b1cb1caf329c0263b5c'}, page_content='AI & Machine Learning'),\n",
685
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'ef0b6aa953694b1cb1caf329c0263b5c', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': 'c93d8d94014b39c79538e77764d06774'}, page_content='Unlock the future with AI and machine learning solutions that help you automate processes, analyze data, and make smarter decisions. From predictive analytics to personalized customer experiences, we’ll help you stay ahead of the curve.'),\n",
686
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a813a122ea83dfb365f3f8424aba978c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '0991c56dd134d345ff8d6e16ffd7f768'}, page_content='Deployment & Maintenance'),\n",
687
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '0991c56dd134d345ff8d6e16ffd7f768', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': 'df7477130921e1d1a646e57784e30878'}, page_content='Leave the technicalities to us! We offer seamless deployment services, ensuring your web and software solutions are launched efficiently with ongoing maintenance to keep everything running smoothly.'),\n",
688
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a813a122ea83dfb365f3f8424aba978c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '9f8b94bb45907e6adca40628ce5d6852'}, page_content='Design Services'),\n",
689
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '9f8b94bb45907e6adca40628ce5d6852', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': '7881ffb9d25e9f4121467278c8a06dc6'}, page_content='First impressions matter! Our design team crafts intuitive, eye-catching designs that resonate with your brand’s identity. From branding to UI/UX design, we make sure your product looks as good as it performs.'),\n",
690
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a813a122ea83dfb365f3f8424aba978c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '478986103f66efcb7df645b940f29e7f'}, page_content='Database Management'),\n",
691
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '478986103f66efcb7df645b940f29e7f', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': '9bd9a46865c444f28f4b5a915c3644db'}, page_content='Your data is your most valuable asset. We offer comprehensive database solutions, from design and setup to optimization and management, ensuring secure, scalable, and efficient data handling.'),\n",
692
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a813a122ea83dfb365f3f8424aba978c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'ef57a5666f7e4af366fb092a663f9608'}, page_content='Custom Software'),\n",
693
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'ef57a5666f7e4af366fb092a663f9608', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': 'bce6bffd80584db9a2b56f80abd81e3a'}, page_content='From concept to execution, we build tailored software solutions that address your unique business needs. Whether it’s enterprise software or mobile apps, we deliver solutions that drive efficiency and growth.'),\n",
694
+ " Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a8321dedec3fe8899162f1b272783251', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '938668ef3561d6b1360299c2d6d5a807'}, page_content='About Us'),\n",
695
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '938668ef3561d6b1360299c2d6d5a807', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': '70d631dd6329e150544527dc9c6c9c35'}, page_content='At The Hexatech, we’re more than just a service provider—we’re your strategic partner in building and scaling your business. With a passion for innovation and a commitment to excellence, we deliver A-Z business solutions that empower companies to thrive in the digital age. Founded with the mission to simplify complex processes and accelerate growth, we specialize in everything from web development and AI to design, deployment, and custom software. Whether you’re a startup launching an MVP or an established business looking to redefine your product, we tailor our solutions to meet your unique needs. Our team of expert developers, designers, and strategists work closely with you to bring your vision to life. At The Hexatech, we believe in a hands-on approach, diving deep into customer development and market analysis to ensure that every solution we deliver is not only functional but also impactful. We’re here to help you Design, Develop, Dominate—because your success is our success.'),\n",
696
+ " Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a8321dedec3fe8899162f1b272783251', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'bcf2561eb44dc7c53d51afd41165301a'}, page_content='Testimonials'),\n",
697
+ " Document(metadata={'category_depth': 3, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'bcf2561eb44dc7c53d51afd41165301a', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '86f5f12762496f3571c7892e3ba135a9'}, page_content='\"I can\\'t say enough about the quality of work I have received. I highly recommend The Hexatech for anyone looking for an exceptional web development company.\"'),\n",
698
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'bcf2561eb44dc7c53d51afd41165301a', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'daccc2be3cbbbae5fa1d1cea8b44bd53'}, page_content='Raj Shaw'),\n",
699
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '022e783cb1b79b0a4f0f93bb04e41f2b'}, page_content='moodmeter'),\n",
700
+ " Document(metadata={'category_depth': 3, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '022e783cb1b79b0a4f0f93bb04e41f2b', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '94e02c7aea78a3648a772f0cf4f2320b'}, page_content='\"The Hexatech has been a game changer for my business. Their team is incredibly skilled and responsive, and their work has been exceptional. I highly recommend The Hexatech for any web development & AI needs.\"'),\n",
701
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '022e783cb1b79b0a4f0f93bb04e41f2b', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '187b2131a84bd4710fafc95df4827972'}, page_content='Rishi Pipaliya'),\n",
702
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '643eab2a84465c21396eba4da3754638'}, page_content='Rose & Fern'),\n",
703
+ " Document(metadata={'category_depth': 3, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '643eab2a84465c21396eba4da3754638', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '94a56bfb26ec626221542aca30b92e31'}, page_content='\"The Hexatech team has been incredibly professional and responsive. They have been able to meet our needs and exceed our expectations.\"'),\n",
704
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '643eab2a84465c21396eba4da3754638', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '4c4c51cb803d93f20260a5d37f405ebb'}, page_content='Jeet Ghosh'),\n",
705
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '019af31e2271b35e3f6304bef67a2325'}, page_content='LumaticAI'),\n",
706
+ " Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '019af31e2271b35e3f6304bef67a2325', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '5a8cbcc5d057a3c31215b56f56eebe80'}, page_content='How We Work'),\n",
707
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '5a8cbcc5d057a3c31215b56f56eebe80', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'b6b7d000c4c0b142c45e691491e2be66'}, page_content='01'),\n",
708
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '2df4608e232cfde989b8d2c73a921d5e'}, page_content='Click on quote'),\n",
709
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '2df4608e232cfde989b8d2c73a921d5e', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'a20a3cdb6b78e4c93999a1d977b832a2'}, page_content='02'),\n",
710
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '033563c87be9c8b842fef65fa51eb87c'}, page_content='Submit the quote form'),\n",
711
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '033563c87be9c8b842fef65fa51eb87c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'bcb091bdbae90710a3cfc43fecc32854'}, page_content='03'),\n",
712
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'bcb091bdbae90710a3cfc43fecc32854', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': 'f570c1fa7f254209fc40301203fc352d'}, page_content='We get in touch through meet'),\n",
713
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '033563c87be9c8b842fef65fa51eb87c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '76b8fe7da31a1c90ca3f7822d11ef24c'}, page_content='04'),\n",
714
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '76b8fe7da31a1c90ca3f7822d11ef24c', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': 'fc6d45688159d48648384deb3602224e'}, page_content='Deal is finalized'),\n",
715
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '033563c87be9c8b842fef65fa51eb87c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'cb85bb1a427444c57f23259910a63271'}, page_content='05'),\n",
716
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'cb85bb1a427444c57f23259910a63271', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': 'b3dfc81aee234a242f679415f694e71e'}, page_content='Project is delivered after payment'),\n",
717
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'de7e4a85336d0f00a0c0af354115d06d'}, page_content='Click Here'),\n",
718
+ " Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'de7e4a85336d0f00a0c0af354115d06d', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'f2542a84aa4bdfba53ecb1b944cd1087'}, page_content=\"Have a project in mind? Let's Talk\"),\n",
719
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'f2542a84aa4bdfba53ecb1b944cd1087', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'f8caf8ca5810b78c46d4bc3c2b0fd8f9'}, page_content='Our Location'),\n",
720
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '6f24b27afe3e5f383363f470d3738f24'}, page_content='Om Shayona Arcade, Gota, Ahmedabad, Gujarat, India, 382481'),\n",
721
+ " Document(metadata={'category_depth': 0, 'link_texts': ['Home', 'About', 'Services', 'Testimonials', 'Quote'], 'link_urls': ['#page1', '/about/index.html', '#page1Box2', '#testimonials', '/quote/index.html'], 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'c88f8facd545bb6e8a062d13982f2ebe'}, page_content='Home About Services Testimonials Quote'),\n",
722
+ " Document(metadata={'category_depth': 0, 'link_texts': ['LinkedIn', 'Instagram', 'Email', 'Phone'], 'link_urls': ['https://www.linkedin.com/company/the-hexatech/about/', 'https://www.instagram.com/thehexatech/', '/cdn-cgi/l/email-protection#5024383538352831243533387e3f3636393339313c10373d31393c7e333f3d', 'tel:+91 9749525157'], 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '4334e83dc625417d783e2a7a75ee8540'}, page_content='LinkedIn Instagram Email Phone'),\n",
723
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '4334e83dc625417d783e2a7a75ee8540', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': '8cbd7e7952b30d4ed3472272e6e0e5a5'}, page_content='© 2024, The Hexatech. All Rights Reserved.'),\n",
724
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '9d0357eb48545a3eb0190751f5ac58f6'}, page_content='Our Company'),\n",
725
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '9d0357eb48545a3eb0190751f5ac58f6', 'url': 'https://thehexatech.com/about/index.html', 'category': 'NarrativeText', 'element_id': '2fb42adc7b5e984d33d83cc5f9533737'}, page_content='At The Hexatech, we believe that every great business starts with a bold idea. We specialize in providing end-to-end business solutions that help entrepreneurs and businesses bring their visions to life. From designing unique digital experiences, to developing state-of-the-art software, to implementing cutting-edge AI and machine learning systems — we do it all.'),\n",
726
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '9d0357eb48545a3eb0190751f5ac58f6', 'url': 'https://thehexatech.com/about/index.html', 'category': 'NarrativeText', 'element_id': '6eea97466a12a2eb4691a7bf4dd7dbd1'}, page_content='Our approach is holistic. We not only build and deploy, but also support businesses throughout their entire lifecycle. Whether you are launching a new product, refining an existing one, or optimizing for scalability, we ensure that every step is handled with care and precision. Our dedicated team of experts will work with you to innovate, develop, and grow your brand to achieve sustained market success.'),\n",
727
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '9d0357eb48545a3eb0190751f5ac58f6', 'url': 'https://thehexatech.com/about/index.html', 'category': 'NarrativeText', 'element_id': '0a688be6002688e1bd236b83c6685328'}, page_content='The Hexatech is your trusted partner for turning digital challenges into opportunities, combining creativity, technology, and business acumen to help you stay ahead of the competition. With our tailored solutions, we aim to transform your business and ensure it thrives in today’s fast-evolving digital world.'),\n",
728
+ " Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '9d0357eb48545a3eb0190751f5ac58f6', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '30e38d2f2cc891f1ce1d512ad55a5d4e'}, page_content='Meet Our Founders'),\n",
729
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '30e38d2f2cc891f1ce1d512ad55a5d4e', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': 'd0ef3163f0a426ee1ddf8082a05eaae1'}, page_content='Rohan Shaw'),\n",
730
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '864b9fd57ef3c68cf9c1beaa71717167'}, page_content='Co-Founder & CEO'),\n",
731
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '864b9fd57ef3c68cf9c1beaa71717167', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': 'b04d02f04d097bddd50214ed1411f9b4'}, page_content='Sujal Merani'),\n",
732
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '38575392064a16ad2e677971c65909af'}, page_content='Co-Founder & CSO'),\n",
733
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '38575392064a16ad2e677971c65909af', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '76302995c4c37fa70c313f943af033e1'}, page_content='Fresil Patel'),\n",
734
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '988847bc72eb2d24bbf243b589f0e1f5'}, page_content='Co-Founder & CMO'),\n",
735
+ " Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '988847bc72eb2d24bbf243b589f0e1f5', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '1c6a0d5446f88de7f3a64268d3482537'}, page_content='Contact'),\n",
736
+ " Document(metadata={'category_depth': 2, 'link_texts': ['[email protected]'], 'link_urls': ['/cdn-cgi/l/email-protection#4034282528253821342523286e2f2626292329212c00272d21292c6e232f2d'], 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '1c6a0d5446f88de7f3a64268d3482537', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': 'e62503f6dd746e461dd27b76adbc1e69'}, page_content='Email : [email protected]'),\n",
737
+ " Document(metadata={'category_depth': 2, 'link_texts': ['+91 9749525157', '+91 9727226136', '+91 8320372440'], 'link_urls': ['tel:+91 9749525157', 'tel:+91 9727226136', 'tel:+91 8320372440'], 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '1c6a0d5446f88de7f3a64268d3482537', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '9322425df598a7db36800b11ff6e4341'}, page_content='Phone : +91 9749525157 | +91 9727226136 | +91 8320372440'),\n",
738
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '1c6a0d5446f88de7f3a64268d3482537', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '316a26b53e2457c38a24dfd834bb27a3'}, page_content='Head Office :'),\n",
739
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '9c234b8193a3c6e2094c1c9b25cab652'}, page_content='211, 2nd floor, Om Shayona Arcade, Gota, Ahmedabad, Gujarat, India'),\n",
740
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '67118c836ec07ff0aa780851e2bff109'}, page_content='Click Here'),\n",
741
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '67118c836ec07ff0aa780851e2bff109', 'url': 'https://thehexatech.com/about/index.html', 'category': 'UncategorizedText', 'element_id': '6ce65a8bee8156d51839d5bb165daccd'}, page_content='&/#10132;'),\n",
742
+ " Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '67118c836ec07ff0aa780851e2bff109', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': 'a7c289d38a16d0061c9cf53ee60d3fe7'}, page_content=\"Have a project in mind? Let's Talk\"),\n",
743
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a7c289d38a16d0061c9cf53ee60d3fe7', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': 'fff5820e7c52059c6dd4b80fac377378'}, page_content='Our Location'),\n",
744
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': 'd8cac8da8b7789dec9d2150a34ad5cc3'}, page_content='Om Shayona Arcade, Gota, Ahmedabad, Gujarat, India, 382481'),\n",
745
+ " Document(metadata={'category_depth': 0, 'link_texts': ['Home', 'About', 'Services', 'Testimonials', 'Quote'], 'link_urls': ['/#page1', '/about/index.html', '/#page1Box2', '/#testimonials', '/quote/index.html'], 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '64b0a55bd9eba5ca20f6c33fed27166f'}, page_content='Home About Services Testimonials Quote'),\n",
746
+ " Document(metadata={'category_depth': 0, 'link_texts': ['LinkedIn', 'Instagram', 'Email', 'Phone'], 'link_urls': ['https://www.linkedin.com/company/the-hexatech/about/', 'https://www.instagram.com/thehexatech/', '/cdn-cgi/l/email-protection#9de9f5f8f5f8e5fce9f8fef5b3f2fbfbf4fef4fcf1ddfaf0fcf4f1b3fef2f0', 'tel:+91 9749525157'], 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': 'a9207d75c7ac89e9d67d65cffa2f9807'}, page_content='LinkedIn Instagram Email Phone'),\n",
747
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a9207d75c7ac89e9d67d65cffa2f9807', 'url': 'https://thehexatech.com/about/index.html', 'category': 'NarrativeText', 'element_id': '9c6e77a08330da21afcbe6a9ce68f244'}, page_content='© 2024, The Hexatech. All Rights Reserved.'),\n",
748
+ " Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'Title', 'element_id': '3c242d6309c71ef20ac0a761c047c5ea'}, page_content='Get A Quote'),\n",
749
+ " Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'Title', 'element_id': '3c8adb6c12d201c5797ac00b7334ebb4'}, page_content='Thank you! We will get back to you soon.'),\n",
750
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'Title', 'element_id': 'd0bb7211fcefbbc5691caeb93cc8d56c'}, page_content='Click Here'),\n",
751
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'd0bb7211fcefbbc5691caeb93cc8d56c', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'UncategorizedText', 'element_id': 'e7eaf82161f1ec7d7ac4937791c0e7d7'}, page_content='&/#10132;'),\n",
752
+ " Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'd0bb7211fcefbbc5691caeb93cc8d56c', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'Title', 'element_id': '0794700a012e87ec297b94d01edee052'}, page_content=\"Have a project in mind? Let's Talk\"),\n",
753
+ " Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '0794700a012e87ec297b94d01edee052', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'Title', 'element_id': 'd23be126165c0fff800694f8f8bac6ff'}, page_content='Our Location'),\n",
754
+ " Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'Title', 'element_id': 'ba20a3788d99b79398694e2be15be0a4'}, page_content='Om Shayona Arcade, Gota, Ahmedabad, Gujarat, India, 382481'),\n",
755
+ " Document(metadata={'category_depth': 0, 'link_texts': ['Home', 'About', 'Services', 'Testimonials', 'Quote'], 'link_urls': ['/#page1', '/about/index.html', '/#page1Box2', '/#testimonials', '/quote/index.html'], 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'Title', 'element_id': '16c0f77bbd46a85c591652e06af0595b'}, page_content='Home About Services Testimonials Quote'),\n",
756
+ " Document(metadata={'category_depth': 0, 'link_texts': ['LinkedIn', 'Instagram', 'Email', 'Phone'], 'link_urls': ['https://www.linkedin.com/company/the-hexatech/about/', 'https://www.instagram.com/thehexatech/', '/cdn-cgi/l/email-protection#b0c4d8d5d8d5c8d1c4d5d3d89edfd6d6d9d3d9d1dcf0d7ddd1d9dc9ed3dfdd', 'tel:+91 9749525157'], 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'Title', 'element_id': 'f8bff147020fc1cb4df51c14fb152bcb'}, page_content='LinkedIn Instagram Email Phone'),\n",
757
+ " Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'f8bff147020fc1cb4df51c14fb152bcb', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'NarrativeText', 'element_id': '8827d6da3cae3e9982671539a167ecd2'}, page_content='© 2024, The Hexatech. All Rights Reserved.')]"
758
+ ]
759
+ },
760
+ "execution_count": 19,
761
+ "metadata": {},
762
+ "output_type": "execute_result"
763
+ }
764
+ ],
765
+ "source": [
766
+ "dataset"
767
+ ]
768
+ }
769
+ ],
770
+ "metadata": {
771
+ "kernelspec": {
772
+ "display_name": "venv1",
773
+ "language": "python",
774
+ "name": "venv1"
775
+ },
776
+ "language_info": {
777
+ "codemirror_mode": {
778
+ "name": "ipython",
779
+ "version": 3
780
+ },
781
+ "file_extension": ".py",
782
+ "mimetype": "text/x-python",
783
+ "name": "python",
784
+ "nbconvert_exporter": "python",
785
+ "pygments_lexer": "ipython3",
786
+ "version": "3.12.2"
787
+ }
788
+ },
789
+ "nbformat": 4,
790
+ "nbformat_minor": 2
791
+ }