Spaces:
Running
Running
Upload 8 files
Browse files- Dockerfile +12 -12
- chatbot.py +2 -2
- chatbotmemory.py +91 -0
- dataset.txt +66 -75
- requirements.txt +11 -11
- scrapWebpage.py +39 -0
- test.ipynb +791 -0
Dockerfile
CHANGED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
-
FROM python:
|
| 2 |
-
|
| 3 |
-
WORKDIR /
|
| 4 |
-
|
| 5 |
-
COPY ./requirements.txt .
|
| 6 |
-
|
| 7 |
-
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 8 |
-
|
| 9 |
-
COPY . .
|
| 10 |
-
|
| 11 |
-
EXPOSE 7860
|
| 12 |
-
|
| 13 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
| 1 |
+
FROM python:latest
|
| 2 |
+
|
| 3 |
+
WORKDIR /
|
| 4 |
+
|
| 5 |
+
COPY ./requirements.txt .
|
| 6 |
+
|
| 7 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 8 |
+
|
| 9 |
+
COPY . .
|
| 10 |
+
|
| 11 |
+
EXPOSE 7860
|
| 12 |
+
|
| 13 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
chatbot.py
CHANGED
|
@@ -36,10 +36,10 @@ class Chatbot():
|
|
| 36 |
docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
|
| 37 |
|
| 38 |
|
| 39 |
-
llm = GoogleGenerativeAI(model="
|
| 40 |
|
| 41 |
template = """
|
| 42 |
-
INSTRUCTION: Act as
|
| 43 |
to a user who wants to get his query solved about The Hexatech. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
|
| 44 |
If you don't know any ANSWER, say you don't know \
|
| 45 |
Always follow general guardrails before generating any response. \
|
|
|
|
| 36 |
docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
|
| 37 |
|
| 38 |
|
| 39 |
+
llm = GoogleGenerativeAI(model="Gemini 2.5 Flash-Lite Preview 06-17", google_api_key=os.getenv("GEMINI_API_KEY"))
|
| 40 |
|
| 41 |
template = """
|
| 42 |
+
INSTRUCTION: Act as Customer Support chatbot of The Hexatech, an IT Startup that provides Daas \
|
| 43 |
to a user who wants to get his query solved about The Hexatech. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
|
| 44 |
If you don't know any ANSWER, say you don't know \
|
| 45 |
Always follow general guardrails before generating any response. \
|
chatbotmemory.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 2 |
+
from langchain_community.document_loaders import TextLoader
|
| 3 |
+
from langchain.schema.runnable import RunnablePassthrough
|
| 4 |
+
from langchain.schema.output_parser import StrOutputParser
|
| 5 |
+
from langchain_pinecone import PineconeVectorStore
|
| 6 |
+
from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate
|
| 7 |
+
from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
|
| 8 |
+
from dotenv import load_dotenv, find_dotenv
|
| 9 |
+
import os
|
| 10 |
+
from pinecone import Pinecone, PodSpec
|
| 11 |
+
|
| 12 |
+
load_dotenv(find_dotenv())
|
| 13 |
+
|
| 14 |
+
class ChatbotMemory():
|
| 15 |
+
|
| 16 |
+
loader = TextLoader('dataset.txt', autodetect_encoding=True)
|
| 17 |
+
documents = loader.load()
|
| 18 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=103)
|
| 19 |
+
docs = text_splitter.split_documents(documents)
|
| 20 |
+
|
| 21 |
+
embeddings = GoogleGenerativeAIEmbeddings(
|
| 22 |
+
model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY"))
|
| 23 |
+
|
| 24 |
+
pinecone = Pinecone(
|
| 25 |
+
api_key=os.environ.get("PINECONE_API_KEY")
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
index_name = "chatbot"
|
| 29 |
+
|
| 30 |
+
if index_name not in pinecone.list_indexes().names():
|
| 31 |
+
pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter"))
|
| 32 |
+
docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)
|
| 33 |
+
else:
|
| 34 |
+
docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))
|
| 38 |
+
|
| 39 |
+
def contextualized_question(input: dict):
|
| 40 |
+
if input.get("chat_history"):
|
| 41 |
+
llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))
|
| 42 |
+
|
| 43 |
+
contextualize_q_system_prompt = """Given a chat history and the latest user question \
|
| 44 |
+
which might reference context in the chat history, formulate a standalone question \
|
| 45 |
+
which can be understood without the chat history. Do NOT answer the question, \
|
| 46 |
+
just reformulate it if needed and otherwise return it as is."""
|
| 47 |
+
|
| 48 |
+
contextualize_q_prompt = ChatPromptTemplate.from_messages(
|
| 49 |
+
[
|
| 50 |
+
("system", contextualize_q_system_prompt),
|
| 51 |
+
MessagesPlaceholder(variable_name="chat_history"),
|
| 52 |
+
("human", "{question}"),
|
| 53 |
+
]
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()
|
| 57 |
+
return contextualize_q_chain
|
| 58 |
+
else:
|
| 59 |
+
return input["question"]
|
| 60 |
+
|
| 61 |
+
template = """
|
| 62 |
+
INSTRUCTION: Act as PRAN, Personal Assistant of Rohan Shaw, this is conversation \
|
| 63 |
+
to a user who wants to get his query solved about Rohan Shaw, his professional and personal life. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
|
| 64 |
+
If you don't know any ANSWER, say you don't know \
|
| 65 |
+
Always follow general guardrails before generating any response. \
|
| 66 |
+
Always try to keep the conversation in context to Rohan Shaw. Keep your replies short \
|
| 67 |
+
compassionate and informative.\
|
| 68 |
+
Give the answer from the CONTEXT\
|
| 69 |
+
You should help user to get his query solved and also try to increase engagement and promoting Rohan Shaw.\
|
| 70 |
+
|
| 71 |
+
CONTEXT: {context}
|
| 72 |
+
QUESTION: {question}
|
| 73 |
+
ANSWER:
|
| 74 |
+
"""
|
| 75 |
+
|
| 76 |
+
prompt = ChatPromptTemplate.from_messages(
|
| 77 |
+
[
|
| 78 |
+
("system", template),
|
| 79 |
+
MessagesPlaceholder(variable_name="chat_history"),
|
| 80 |
+
("human", "{question}"),
|
| 81 |
+
]
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
rag_chain = (
|
| 86 |
+
RunnablePassthrough.assign(
|
| 87 |
+
context=contextualized_question | docsearch.as_retriever()
|
| 88 |
+
)
|
| 89 |
+
| prompt
|
| 90 |
+
| llm
|
| 91 |
+
)
|
dataset.txt
CHANGED
|
@@ -1,91 +1,74 @@
|
|
| 1 |
-
|
| 2 |
-
Hexa Core Services For Your Business.
|
| 3 |
-
we build success stories for you, get your business a lucky touch
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
Viridiv
|
| 10 |
Healscure
|
| 11 |
CodeLens
|
| 12 |
|
| 13 |
-
Be the
|
| 14 |
-
|
| 15 |
-
Let
|
| 16 |
-
|
| 17 |
-
What
|
| 18 |
-
At
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
Raj Shaw
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
| 40 |
Rishi Pipaliya
|
| 41 |
-
Rose & Fern
|
| 42 |
-
|
|
|
|
|
|
|
| 43 |
Jeet Ghosh
|
| 44 |
-
LumaticAI
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
01
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
We get in touch through meet
|
| 53 |
-
04
|
| 54 |
-
Deal is finalized
|
| 55 |
-
05
|
| 56 |
-
Project is delivered after payment
|
| 57 |
|
| 58 |
Have a project in mind? Let's Talk
|
| 59 |
|
| 60 |
-
Our Location
|
| 61 |
Om Shayona Arcade, Gota, Ahmedabad, Gujarat, India, 382481
|
| 62 |
|
| 63 |
-
Our Company
|
| 64 |
-
At The Hexatech, we believe that every great business starts with a bold idea. We specialize in providing end-to-end business solutions that help entrepreneurs and businesses bring their visions to life. From designing unique digital experiences, to developing state-of-the-art software, to implementing cutting-edge AI and machine learning systems — we do it all.
|
| 65 |
-
Our approach is holistic. We not only build and deploy, but also support businesses throughout their entire lifecycle. Whether you are launching a new product, refining an existing one, or optimizing for scalability, we ensure that every step is handled with care and precision. Our dedicated team of experts will work with you to innovate, develop, and grow your brand to achieve sustained market success.
|
| 66 |
-
The Hexatech is your trusted partner for turning digital challenges into opportunities, combining creativity, technology, and business acumen to help you stay ahead of the competition. With our tailored solutions, we aim to transform your business and ensure it thrives in today’s fast-evolving digital world.
|
| 67 |
-
|
| 68 |
-
Meet Our Founders
|
| 69 |
-
Rohan Shaw
|
| 70 |
-
Co-Founder & CEO
|
| 71 |
-
https://instagram.com/the_rohanshaw
|
| 72 |
-
https://linkedin.com/in/rohan-shaw-rs
|
| 73 |
-
https://x.com/heyMeRohan
|
| 74 |
-
Sujal Merani
|
| 75 |
-
Co-Founder & CSO
|
| 76 |
-
https://instagram.com/sujal__merani
|
| 77 |
-
https://linkedin.com/in/sujal-merani-150316329
|
| 78 |
-
https://x.com/msujal_21
|
| 79 |
-
Fresil Patel
|
| 80 |
-
Co-Founder & CMO
|
| 81 |
-
https://instagram.com/fresil_patel
|
| 82 |
-
https://linkedin.com/in/fresilpatel
|
| 83 |
-
https://x.com/PatelFresil
|
| 84 |
-
|
| 85 |
Contact
|
| 86 |
-
Email :
|
| 87 |
-
Business : connect@thehexatech.com
|
| 88 |
-
Phone : +91 9749525157 | +91 9727226136 | +91 8320372440
|
| 89 |
Head Office :
|
| 90 |
211, 2nd floor, Om Shayona Arcade, Gota, Ahmedabad, Gujarat, India
|
| 91 |
|
|
@@ -96,4 +79,12 @@ website : https://thehexatech.com
|
|
| 96 |
LinkedIn: https://www.linkedin.com/company/the-hexatech/about/
|
| 97 |
Instagram: https://www.instagram.com/thehexatech/
|
| 98 |
|
| 99 |
-
© 2024, The Hexatech. All Rights Reserved.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Tagline : Data. Decision. Domination
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
- Data as a Service For Your Business.
|
| 4 |
+
- we build success stories for you, get your business a lucky touch
|
| 5 |
+
|
| 6 |
+
- Clients:
|
| 7 |
Viridiv
|
| 8 |
Healscure
|
| 9 |
CodeLens
|
| 10 |
|
| 11 |
+
- Be the Achiever of Achievers
|
| 12 |
+
Transform raw web data into actionable intelligence with The Hexatech, your premier partner for Data-as-a-Service solutions. We handle the complexities of web scraping, data extraction, and validation, delivering clean, structured data and data apis directly to you. Focus on insights, not infrastructure.
|
| 13 |
+
Let's power your decisions with precise data.
|
| 14 |
+
|
| 15 |
+
- What We Offer: Your Data, Streamlined
|
| 16 |
+
At TheHexaTech, we specialize in delivering precise and scalable Data-as-a-Service solutions. We extract, process, and deliver the web data
|
| 17 |
+
you need to drive critical business decisions. Our expert team handles every complexity, from sophisticated web and app scraping to AI-powered data processing, ensuring you receive clean, actionable insights without the hassle.
|
| 18 |
+
|
| 19 |
+
- Web Scraping & Data Extraction
|
| 20 |
+
Get accurate, large-scale data from any public website. We handle dynamic content, anti-bot measures, and complex structures to deliver the
|
| 21 |
+
exact data points you need for market research, competitive analysis, and more.
|
| 22 |
+
|
| 23 |
+
- App Scraping & Mobile Data
|
| 24 |
+
Access valuable data from mobile applications. Our solutions extract information from public app listings, reviews, and other available app-based data sources, providing unique insights into mobile trends and user sentiment.
|
| 25 |
+
|
| 26 |
+
- Custom Scraping Solutions
|
| 27 |
+
Have unique data needs? We design and implement bespoke scraping solutions tailored to your specific requirements, ensuring you get precisely the data you're looking for, no matter the complexity or source.
|
| 28 |
+
|
| 29 |
+
- Scraping APIs & Real-time Feeds
|
| 30 |
+
Integrate directly with our robust APIs for real-time, on-demand data access. Set up continuous data feeds to keep your systems updated with the freshest information for dynamic pricing, instant alerts, and live monitoring.
|
| 31 |
+
|
| 32 |
+
- AI/ML Powered Data Processing
|
| 33 |
+
Beyond extraction, we leverage AI and Machine Learning to process, clean, and enrich your data. From sentiment analysis and categorization to deduplication and natural language processing, we turn raw data into intelligent insights.
|
| 34 |
+
|
| 35 |
+
- AI/ML Solutions
|
| 36 |
+
Unlock deeper insights with custom Artificial Intelligence and Machine Learning solutions. From predictive analytics and trend forecasting to sentiment analysis and intelligent automation, we build models that leverage your data to drive innovation.
|
| 37 |
+
|
| 38 |
+
- About TheHexaTech
|
| 39 |
+
At The Hexatech, we are more than just a service provider — we are your dedicated partner in unlocking the true power of data. With a steadfast commitment to accuracy, scalability, and innovation, we deliver comprehensive Data-as-a-Service solutions designed to empower businesses in the digital age. Founded with the mission to simplify complex data acquisition and accelerate informed decision-making, we specialize in transforming vast amounts of raw web information into clean, structured, and actionable intelligence. Whether you're a startup seeking market insights or an established enterprise aiming for competitive advantage, we tailor our DaaS solutions to meet your unique and evolving data needs. Our team of expert data engineers, analysts, and AI/ML specialists works diligently to ensure precise data extraction, rigorous validation, and seamless delivery. At TheHexaTech, we believe in a client-centric approach, diving deep into your specific requirements to provide data solutions that are not only reliable but also directly impactful to your strategic goals. We're here to help you Extract, Analyze,
|
| 40 |
+
Excel – because your data-driven success is our ultimate mission.
|
| 41 |
+
|
| 42 |
+
- What Our Clients Say About Our Data Solutions
|
| 43 |
+
"The data quality and the speed of delivery from TheHexaTech are simply unparalleled. Their web scraping services have provided us with invaluable market insights, giving us a significant competitive edge."
|
| 44 |
+
Moodmeter Logo
|
| 45 |
Raj Shaw
|
| 46 |
+
Market Intelligence Lead, Moodmeter
|
| 47 |
+
|
| 48 |
+
"TheHexaTech has been a game-changer for our data strategy. Their custom scraping solutions and AI/ML expertise transformed how we gather and analyze information. Highly recommend them for any data needs!"
|
| 49 |
+
Rose & Fern Logo
|
| 50 |
Rishi Pipaliya
|
| 51 |
+
Head of Product, Rose & Fern
|
| 52 |
+
|
| 53 |
+
"The Hexatech team is incredibly professional and responsive. They not only met our complex data extraction needs but also exceeded our expectations with their advanced data validation and seamless delivery."
|
| 54 |
+
LumaticAI Logo
|
| 55 |
Jeet Ghosh
|
| 56 |
+
VP of Operations, LumaticAI
|
| 57 |
+
|
| 58 |
+
- Our Data Solutions Process
|
| 59 |
+
01. Get in touch to discuss your specific data requirements, target sources, and desired output formats. We'll provide a custom quote.
|
| 60 |
+
02. Our experts design a tailored data extraction solution, configuring crawlers and ensuring robust anti-blocking measures.
|
| 61 |
+
03. We begin the high-volume data collection, applying rigorous quality checks and structuring the data for accuracy and usability.
|
| 62 |
+
04. Receive your clean, structured data in your preferred format (API, CSV, JSON, etc.) directly to your systems or storage.
|
| 63 |
+
05. We provide continuous monitoring, maintenance, and support to ensure your data feeds remain consistent and reliable.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
Have a project in mind? Let's Talk
|
| 66 |
|
| 67 |
+
- Our Location
|
| 68 |
Om Shayona Arcade, Gota, Ahmedabad, Gujarat, India, 382481
|
| 69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
Contact
|
| 71 |
+
Email : thehexatech.official@gmail.com
|
|
|
|
|
|
|
| 72 |
Head Office :
|
| 73 |
211, 2nd floor, Om Shayona Arcade, Gota, Ahmedabad, Gujarat, India
|
| 74 |
|
|
|
|
| 79 |
LinkedIn: https://www.linkedin.com/company/the-hexatech/about/
|
| 80 |
Instagram: https://www.instagram.com/thehexatech/
|
| 81 |
|
| 82 |
+
© 2024, The Hexatech. All Rights Reserved.
|
| 83 |
+
|
| 84 |
+
- Our Company
|
| 85 |
+
At TheHexaTech, we believe that precise, actionable data is the cornerstone of every successful modern business. We specialize in providing
|
| 86 |
+
comprehensive Data-as-a-Service (DaaS) solutions that empower entrepreneurs and established enterprises to harness the vast potential of web intelligence. From sophisticated web and app scraping to advanced AI and machine learning for data processing and insights – we manage the
|
| 87 |
+
entire data lifecycle for you.
|
| 88 |
+
Our approach is entirely client-focused. We not only extract and deliver high-quality data, but also serve as your strategic partner in transforming raw information into a competitive advantage. Whether you are seeking real-time market trends, competitive pricing intelligence, lead generation, or custom data sets for analytical models, we ensure every data point is accurate, reliable, and tailored to your specific needs.
|
| 89 |
+
TheHexaTech is your trusted ally for navigating the complexities of big data, turning digital noise into clear opportunities. Our dedicated
|
| 90 |
+
team of data experts combines cutting-edge technology with deep business acumen to help you stay ahead in today’s data-intensive world. With our managed DaaS solutions, we aim to streamline your data acquisition process, allowing you to focus on innovation and growth.
|
requirements.txt
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
-
langchain
|
| 2 |
-
langchain-community
|
| 3 |
-
langchain-core
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
langchain_google_genai
|
| 8 |
-
langchain-pinecone
|
| 9 |
-
chardet
|
| 10 |
-
uvicorn
|
| 11 |
-
|
|
|
|
| 1 |
+
langchain
|
| 2 |
+
langchain-community
|
| 3 |
+
langchain-core
|
| 4 |
+
langchain-unstructured
|
| 5 |
+
pinecone-client
|
| 6 |
+
python-dotenv
|
| 7 |
+
langchain_google_genai
|
| 8 |
+
langchain-pinecone
|
| 9 |
+
chardet
|
| 10 |
+
uvicorn
|
| 11 |
+
fastapi
|
scrapWebpage.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from bs4 import BeautifulSoup
|
| 3 |
+
|
| 4 |
+
# def load_from_website(url):
|
| 5 |
+
# response = requests.get(url)
|
| 6 |
+
# soup = BeautifulSoup(response.content, 'html.parser')
|
| 7 |
+
# text = soup.get_text(separator="\n")
|
| 8 |
+
# return [text]
|
| 9 |
+
|
| 10 |
+
# print(load_from_website("https://thehexatech.com"))
|
| 11 |
+
# print()
|
| 12 |
+
# print(load_from_website("https://thehexatech.com/about/index.html"))
|
| 13 |
+
# print()
|
| 14 |
+
# print(load_from_website("https://thehexatech.com/quote/index.html"))
|
| 15 |
+
# print()
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
import asyncio
|
| 19 |
+
from langchain_unstructured import UnstructuredLoader
|
| 20 |
+
|
| 21 |
+
page_url = "https://thehexatech.com/about"
|
| 22 |
+
loader = UnstructuredLoader(web_url=page_url)
|
| 23 |
+
|
| 24 |
+
docs = []
|
| 25 |
+
|
| 26 |
+
async def get_data():
|
| 27 |
+
global docs
|
| 28 |
+
async for doc in loader.alazy_load():
|
| 29 |
+
docs.append(doc)
|
| 30 |
+
|
| 31 |
+
async def main():
|
| 32 |
+
await get_data()
|
| 33 |
+
# print(docs)
|
| 34 |
+
for doc in docs:
|
| 35 |
+
print(doc.page_content)
|
| 36 |
+
|
| 37 |
+
asyncio.run(main())
|
| 38 |
+
|
| 39 |
+
|
test.ipynb
ADDED
|
@@ -0,0 +1,791 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"import asyncio\n",
|
| 10 |
+
"from langchain_unstructured import UnstructuredLoader"
|
| 11 |
+
]
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"cell_type": "code",
|
| 15 |
+
"execution_count": 6,
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"outputs": [],
|
| 18 |
+
"source": [
|
| 19 |
+
"page_url = [\"https://thehexatech.com\", \"https://thehexatech.com/about/index.html\", \"https://thehexatech.com/quote/index.html\"] "
|
| 20 |
+
]
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"cell_type": "code",
|
| 24 |
+
"execution_count": 16,
|
| 25 |
+
"metadata": {},
|
| 26 |
+
"outputs": [],
|
| 27 |
+
"source": [
|
| 28 |
+
"async def get_data():\n",
|
| 29 |
+
" docs = []\n",
|
| 30 |
+
" for url in page_url:\n",
|
| 31 |
+
" loader = UnstructuredLoader(web_url=url)\n",
|
| 32 |
+
" async for doc in loader.alazy_load():\n",
|
| 33 |
+
" docs.append(doc)\n",
|
| 34 |
+
" return docs"
|
| 35 |
+
]
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"cell_type": "code",
|
| 39 |
+
"execution_count": 17,
|
| 40 |
+
"metadata": {},
|
| 41 |
+
"outputs": [],
|
| 42 |
+
"source": [
|
| 43 |
+
"try:\n",
|
| 44 |
+
" dataset = await get_data()\n",
|
| 45 |
+
"except RuntimeError:\n",
|
| 46 |
+
" print(\"This environment may not support 'await' outside of an async context.\")"
|
| 47 |
+
]
|
| 48 |
+
},
|
| 49 |
+
{
|
| 50 |
+
"cell_type": "code",
|
| 51 |
+
"execution_count": 33,
|
| 52 |
+
"metadata": {},
|
| 53 |
+
"outputs": [
|
| 54 |
+
{
|
| 55 |
+
"name": "stdout",
|
| 56 |
+
"output_type": "stream",
|
| 57 |
+
"text": [
|
| 58 |
+
"category_depth\n",
|
| 59 |
+
"languages\n",
|
| 60 |
+
"filetype\n",
|
| 61 |
+
"url\n",
|
| 62 |
+
"category\n",
|
| 63 |
+
"element_id\n",
|
| 64 |
+
"category_depth\n",
|
| 65 |
+
"languages\n",
|
| 66 |
+
"filetype\n",
|
| 67 |
+
"url\n",
|
| 68 |
+
"category\n",
|
| 69 |
+
"element_id\n",
|
| 70 |
+
"languages\n",
|
| 71 |
+
"filetype\n",
|
| 72 |
+
"parent_id\n",
|
| 73 |
+
"url\n",
|
| 74 |
+
"category\n",
|
| 75 |
+
"element_id\n",
|
| 76 |
+
"category_depth\n",
|
| 77 |
+
"languages\n",
|
| 78 |
+
"filetype\n",
|
| 79 |
+
"url\n",
|
| 80 |
+
"category\n",
|
| 81 |
+
"element_id\n",
|
| 82 |
+
"category_depth\n",
|
| 83 |
+
"languages\n",
|
| 84 |
+
"filetype\n",
|
| 85 |
+
"url\n",
|
| 86 |
+
"category\n",
|
| 87 |
+
"element_id\n",
|
| 88 |
+
"category_depth\n",
|
| 89 |
+
"languages\n",
|
| 90 |
+
"filetype\n",
|
| 91 |
+
"url\n",
|
| 92 |
+
"category\n",
|
| 93 |
+
"element_id\n",
|
| 94 |
+
"category_depth\n",
|
| 95 |
+
"languages\n",
|
| 96 |
+
"filetype\n",
|
| 97 |
+
"url\n",
|
| 98 |
+
"category\n",
|
| 99 |
+
"element_id\n",
|
| 100 |
+
"category_depth\n",
|
| 101 |
+
"languages\n",
|
| 102 |
+
"filetype\n",
|
| 103 |
+
"url\n",
|
| 104 |
+
"category\n",
|
| 105 |
+
"element_id\n",
|
| 106 |
+
"category_depth\n",
|
| 107 |
+
"languages\n",
|
| 108 |
+
"filetype\n",
|
| 109 |
+
"url\n",
|
| 110 |
+
"category\n",
|
| 111 |
+
"element_id\n",
|
| 112 |
+
"category_depth\n",
|
| 113 |
+
"languages\n",
|
| 114 |
+
"filetype\n",
|
| 115 |
+
"parent_id\n",
|
| 116 |
+
"url\n",
|
| 117 |
+
"category\n",
|
| 118 |
+
"element_id\n",
|
| 119 |
+
"languages\n",
|
| 120 |
+
"filetype\n",
|
| 121 |
+
"parent_id\n",
|
| 122 |
+
"url\n",
|
| 123 |
+
"category\n",
|
| 124 |
+
"element_id\n",
|
| 125 |
+
"languages\n",
|
| 126 |
+
"filetype\n",
|
| 127 |
+
"parent_id\n",
|
| 128 |
+
"url\n",
|
| 129 |
+
"category\n",
|
| 130 |
+
"element_id\n",
|
| 131 |
+
"category_depth\n",
|
| 132 |
+
"languages\n",
|
| 133 |
+
"filetype\n",
|
| 134 |
+
"parent_id\n",
|
| 135 |
+
"url\n",
|
| 136 |
+
"category\n",
|
| 137 |
+
"element_id\n",
|
| 138 |
+
"languages\n",
|
| 139 |
+
"filetype\n",
|
| 140 |
+
"parent_id\n",
|
| 141 |
+
"url\n",
|
| 142 |
+
"category\n",
|
| 143 |
+
"element_id\n",
|
| 144 |
+
"category_depth\n",
|
| 145 |
+
"languages\n",
|
| 146 |
+
"filetype\n",
|
| 147 |
+
"parent_id\n",
|
| 148 |
+
"url\n",
|
| 149 |
+
"category\n",
|
| 150 |
+
"element_id\n",
|
| 151 |
+
"languages\n",
|
| 152 |
+
"filetype\n",
|
| 153 |
+
"parent_id\n",
|
| 154 |
+
"url\n",
|
| 155 |
+
"category\n",
|
| 156 |
+
"element_id\n",
|
| 157 |
+
"category_depth\n",
|
| 158 |
+
"languages\n",
|
| 159 |
+
"filetype\n",
|
| 160 |
+
"parent_id\n",
|
| 161 |
+
"url\n",
|
| 162 |
+
"category\n",
|
| 163 |
+
"element_id\n",
|
| 164 |
+
"languages\n",
|
| 165 |
+
"filetype\n",
|
| 166 |
+
"parent_id\n",
|
| 167 |
+
"url\n",
|
| 168 |
+
"category\n",
|
| 169 |
+
"element_id\n",
|
| 170 |
+
"category_depth\n",
|
| 171 |
+
"languages\n",
|
| 172 |
+
"filetype\n",
|
| 173 |
+
"parent_id\n",
|
| 174 |
+
"url\n",
|
| 175 |
+
"category\n",
|
| 176 |
+
"element_id\n",
|
| 177 |
+
"languages\n",
|
| 178 |
+
"filetype\n",
|
| 179 |
+
"parent_id\n",
|
| 180 |
+
"url\n",
|
| 181 |
+
"category\n",
|
| 182 |
+
"element_id\n",
|
| 183 |
+
"category_depth\n",
|
| 184 |
+
"languages\n",
|
| 185 |
+
"filetype\n",
|
| 186 |
+
"parent_id\n",
|
| 187 |
+
"url\n",
|
| 188 |
+
"category\n",
|
| 189 |
+
"element_id\n",
|
| 190 |
+
"languages\n",
|
| 191 |
+
"filetype\n",
|
| 192 |
+
"parent_id\n",
|
| 193 |
+
"url\n",
|
| 194 |
+
"category\n",
|
| 195 |
+
"element_id\n",
|
| 196 |
+
"category_depth\n",
|
| 197 |
+
"languages\n",
|
| 198 |
+
"filetype\n",
|
| 199 |
+
"parent_id\n",
|
| 200 |
+
"url\n",
|
| 201 |
+
"category\n",
|
| 202 |
+
"element_id\n",
|
| 203 |
+
"languages\n",
|
| 204 |
+
"filetype\n",
|
| 205 |
+
"parent_id\n",
|
| 206 |
+
"url\n",
|
| 207 |
+
"category\n",
|
| 208 |
+
"element_id\n",
|
| 209 |
+
"category_depth\n",
|
| 210 |
+
"languages\n",
|
| 211 |
+
"filetype\n",
|
| 212 |
+
"parent_id\n",
|
| 213 |
+
"url\n",
|
| 214 |
+
"category\n",
|
| 215 |
+
"element_id\n",
|
| 216 |
+
"languages\n",
|
| 217 |
+
"filetype\n",
|
| 218 |
+
"parent_id\n",
|
| 219 |
+
"url\n",
|
| 220 |
+
"category\n",
|
| 221 |
+
"element_id\n",
|
| 222 |
+
"category_depth\n",
|
| 223 |
+
"languages\n",
|
| 224 |
+
"filetype\n",
|
| 225 |
+
"parent_id\n",
|
| 226 |
+
"url\n",
|
| 227 |
+
"category\n",
|
| 228 |
+
"element_id\n",
|
| 229 |
+
"languages\n",
|
| 230 |
+
"filetype\n",
|
| 231 |
+
"parent_id\n",
|
| 232 |
+
"url\n",
|
| 233 |
+
"category\n",
|
| 234 |
+
"element_id\n",
|
| 235 |
+
"category_depth\n",
|
| 236 |
+
"languages\n",
|
| 237 |
+
"filetype\n",
|
| 238 |
+
"parent_id\n",
|
| 239 |
+
"url\n",
|
| 240 |
+
"category\n",
|
| 241 |
+
"element_id\n",
|
| 242 |
+
"category_depth\n",
|
| 243 |
+
"languages\n",
|
| 244 |
+
"filetype\n",
|
| 245 |
+
"parent_id\n",
|
| 246 |
+
"url\n",
|
| 247 |
+
"category\n",
|
| 248 |
+
"element_id\n",
|
| 249 |
+
"category_depth\n",
|
| 250 |
+
"languages\n",
|
| 251 |
+
"filetype\n",
|
| 252 |
+
"parent_id\n",
|
| 253 |
+
"url\n",
|
| 254 |
+
"category\n",
|
| 255 |
+
"element_id\n",
|
| 256 |
+
"category_depth\n",
|
| 257 |
+
"languages\n",
|
| 258 |
+
"filetype\n",
|
| 259 |
+
"url\n",
|
| 260 |
+
"category\n",
|
| 261 |
+
"element_id\n",
|
| 262 |
+
"category_depth\n",
|
| 263 |
+
"languages\n",
|
| 264 |
+
"filetype\n",
|
| 265 |
+
"parent_id\n",
|
| 266 |
+
"url\n",
|
| 267 |
+
"category\n",
|
| 268 |
+
"element_id\n",
|
| 269 |
+
"category_depth\n",
|
| 270 |
+
"languages\n",
|
| 271 |
+
"filetype\n",
|
| 272 |
+
"parent_id\n",
|
| 273 |
+
"url\n",
|
| 274 |
+
"category\n",
|
| 275 |
+
"element_id\n",
|
| 276 |
+
"category_depth\n",
|
| 277 |
+
"languages\n",
|
| 278 |
+
"filetype\n",
|
| 279 |
+
"url\n",
|
| 280 |
+
"category\n",
|
| 281 |
+
"element_id\n",
|
| 282 |
+
"category_depth\n",
|
| 283 |
+
"languages\n",
|
| 284 |
+
"filetype\n",
|
| 285 |
+
"parent_id\n",
|
| 286 |
+
"url\n",
|
| 287 |
+
"category\n",
|
| 288 |
+
"element_id\n",
|
| 289 |
+
"category_depth\n",
|
| 290 |
+
"languages\n",
|
| 291 |
+
"filetype\n",
|
| 292 |
+
"parent_id\n",
|
| 293 |
+
"url\n",
|
| 294 |
+
"category\n",
|
| 295 |
+
"element_id\n",
|
| 296 |
+
"category_depth\n",
|
| 297 |
+
"languages\n",
|
| 298 |
+
"filetype\n",
|
| 299 |
+
"url\n",
|
| 300 |
+
"category\n",
|
| 301 |
+
"element_id\n",
|
| 302 |
+
"category_depth\n",
|
| 303 |
+
"languages\n",
|
| 304 |
+
"filetype\n",
|
| 305 |
+
"parent_id\n",
|
| 306 |
+
"url\n",
|
| 307 |
+
"category\n",
|
| 308 |
+
"element_id\n",
|
| 309 |
+
"category_depth\n",
|
| 310 |
+
"languages\n",
|
| 311 |
+
"filetype\n",
|
| 312 |
+
"parent_id\n",
|
| 313 |
+
"url\n",
|
| 314 |
+
"category\n",
|
| 315 |
+
"element_id\n",
|
| 316 |
+
"category_depth\n",
|
| 317 |
+
"languages\n",
|
| 318 |
+
"filetype\n",
|
| 319 |
+
"url\n",
|
| 320 |
+
"category\n",
|
| 321 |
+
"element_id\n",
|
| 322 |
+
"category_depth\n",
|
| 323 |
+
"languages\n",
|
| 324 |
+
"filetype\n",
|
| 325 |
+
"parent_id\n",
|
| 326 |
+
"url\n",
|
| 327 |
+
"category\n",
|
| 328 |
+
"element_id\n",
|
| 329 |
+
"category_depth\n",
|
| 330 |
+
"languages\n",
|
| 331 |
+
"filetype\n",
|
| 332 |
+
"url\n",
|
| 333 |
+
"category\n",
|
| 334 |
+
"element_id\n",
|
| 335 |
+
"category_depth\n",
|
| 336 |
+
"languages\n",
|
| 337 |
+
"filetype\n",
|
| 338 |
+
"parent_id\n",
|
| 339 |
+
"url\n",
|
| 340 |
+
"category\n",
|
| 341 |
+
"element_id\n",
|
| 342 |
+
"languages\n",
|
| 343 |
+
"filetype\n",
|
| 344 |
+
"parent_id\n",
|
| 345 |
+
"url\n",
|
| 346 |
+
"category\n",
|
| 347 |
+
"element_id\n",
|
| 348 |
+
"category_depth\n",
|
| 349 |
+
"languages\n",
|
| 350 |
+
"filetype\n",
|
| 351 |
+
"parent_id\n",
|
| 352 |
+
"url\n",
|
| 353 |
+
"category\n",
|
| 354 |
+
"element_id\n",
|
| 355 |
+
"languages\n",
|
| 356 |
+
"filetype\n",
|
| 357 |
+
"parent_id\n",
|
| 358 |
+
"url\n",
|
| 359 |
+
"category\n",
|
| 360 |
+
"element_id\n",
|
| 361 |
+
"category_depth\n",
|
| 362 |
+
"languages\n",
|
| 363 |
+
"filetype\n",
|
| 364 |
+
"parent_id\n",
|
| 365 |
+
"url\n",
|
| 366 |
+
"category\n",
|
| 367 |
+
"element_id\n",
|
| 368 |
+
"languages\n",
|
| 369 |
+
"filetype\n",
|
| 370 |
+
"parent_id\n",
|
| 371 |
+
"url\n",
|
| 372 |
+
"category\n",
|
| 373 |
+
"element_id\n",
|
| 374 |
+
"category_depth\n",
|
| 375 |
+
"languages\n",
|
| 376 |
+
"filetype\n",
|
| 377 |
+
"url\n",
|
| 378 |
+
"category\n",
|
| 379 |
+
"element_id\n",
|
| 380 |
+
"category_depth\n",
|
| 381 |
+
"languages\n",
|
| 382 |
+
"filetype\n",
|
| 383 |
+
"parent_id\n",
|
| 384 |
+
"url\n",
|
| 385 |
+
"category\n",
|
| 386 |
+
"element_id\n",
|
| 387 |
+
"category_depth\n",
|
| 388 |
+
"languages\n",
|
| 389 |
+
"filetype\n",
|
| 390 |
+
"parent_id\n",
|
| 391 |
+
"url\n",
|
| 392 |
+
"category\n",
|
| 393 |
+
"element_id\n",
|
| 394 |
+
"category_depth\n",
|
| 395 |
+
"languages\n",
|
| 396 |
+
"filetype\n",
|
| 397 |
+
"url\n",
|
| 398 |
+
"category\n",
|
| 399 |
+
"element_id\n",
|
| 400 |
+
"category_depth\n",
|
| 401 |
+
"link_texts\n",
|
| 402 |
+
"link_urls\n",
|
| 403 |
+
"languages\n",
|
| 404 |
+
"filetype\n",
|
| 405 |
+
"url\n",
|
| 406 |
+
"category\n",
|
| 407 |
+
"element_id\n",
|
| 408 |
+
"category_depth\n",
|
| 409 |
+
"link_texts\n",
|
| 410 |
+
"link_urls\n",
|
| 411 |
+
"languages\n",
|
| 412 |
+
"filetype\n",
|
| 413 |
+
"url\n",
|
| 414 |
+
"category\n",
|
| 415 |
+
"element_id\n",
|
| 416 |
+
"languages\n",
|
| 417 |
+
"filetype\n",
|
| 418 |
+
"parent_id\n",
|
| 419 |
+
"url\n",
|
| 420 |
+
"category\n",
|
| 421 |
+
"element_id\n",
|
| 422 |
+
"category_depth\n",
|
| 423 |
+
"languages\n",
|
| 424 |
+
"filetype\n",
|
| 425 |
+
"url\n",
|
| 426 |
+
"category\n",
|
| 427 |
+
"element_id\n",
|
| 428 |
+
"languages\n",
|
| 429 |
+
"filetype\n",
|
| 430 |
+
"parent_id\n",
|
| 431 |
+
"url\n",
|
| 432 |
+
"category\n",
|
| 433 |
+
"element_id\n",
|
| 434 |
+
"languages\n",
|
| 435 |
+
"filetype\n",
|
| 436 |
+
"parent_id\n",
|
| 437 |
+
"url\n",
|
| 438 |
+
"category\n",
|
| 439 |
+
"element_id\n",
|
| 440 |
+
"languages\n",
|
| 441 |
+
"filetype\n",
|
| 442 |
+
"parent_id\n",
|
| 443 |
+
"url\n",
|
| 444 |
+
"category\n",
|
| 445 |
+
"element_id\n",
|
| 446 |
+
"category_depth\n",
|
| 447 |
+
"languages\n",
|
| 448 |
+
"filetype\n",
|
| 449 |
+
"parent_id\n",
|
| 450 |
+
"url\n",
|
| 451 |
+
"category\n",
|
| 452 |
+
"element_id\n",
|
| 453 |
+
"category_depth\n",
|
| 454 |
+
"languages\n",
|
| 455 |
+
"filetype\n",
|
| 456 |
+
"parent_id\n",
|
| 457 |
+
"url\n",
|
| 458 |
+
"category\n",
|
| 459 |
+
"element_id\n",
|
| 460 |
+
"category_depth\n",
|
| 461 |
+
"languages\n",
|
| 462 |
+
"filetype\n",
|
| 463 |
+
"url\n",
|
| 464 |
+
"category\n",
|
| 465 |
+
"element_id\n",
|
| 466 |
+
"category_depth\n",
|
| 467 |
+
"languages\n",
|
| 468 |
+
"filetype\n",
|
| 469 |
+
"parent_id\n",
|
| 470 |
+
"url\n",
|
| 471 |
+
"category\n",
|
| 472 |
+
"element_id\n",
|
| 473 |
+
"category_depth\n",
|
| 474 |
+
"languages\n",
|
| 475 |
+
"filetype\n",
|
| 476 |
+
"url\n",
|
| 477 |
+
"category\n",
|
| 478 |
+
"element_id\n",
|
| 479 |
+
"category_depth\n",
|
| 480 |
+
"languages\n",
|
| 481 |
+
"filetype\n",
|
| 482 |
+
"parent_id\n",
|
| 483 |
+
"url\n",
|
| 484 |
+
"category\n",
|
| 485 |
+
"element_id\n",
|
| 486 |
+
"category_depth\n",
|
| 487 |
+
"languages\n",
|
| 488 |
+
"filetype\n",
|
| 489 |
+
"url\n",
|
| 490 |
+
"category\n",
|
| 491 |
+
"element_id\n",
|
| 492 |
+
"category_depth\n",
|
| 493 |
+
"languages\n",
|
| 494 |
+
"filetype\n",
|
| 495 |
+
"parent_id\n",
|
| 496 |
+
"url\n",
|
| 497 |
+
"category\n",
|
| 498 |
+
"element_id\n",
|
| 499 |
+
"category_depth\n",
|
| 500 |
+
"link_texts\n",
|
| 501 |
+
"link_urls\n",
|
| 502 |
+
"languages\n",
|
| 503 |
+
"filetype\n",
|
| 504 |
+
"parent_id\n",
|
| 505 |
+
"url\n",
|
| 506 |
+
"category\n",
|
| 507 |
+
"element_id\n",
|
| 508 |
+
"category_depth\n",
|
| 509 |
+
"link_texts\n",
|
| 510 |
+
"link_urls\n",
|
| 511 |
+
"languages\n",
|
| 512 |
+
"filetype\n",
|
| 513 |
+
"parent_id\n",
|
| 514 |
+
"url\n",
|
| 515 |
+
"category\n",
|
| 516 |
+
"element_id\n",
|
| 517 |
+
"category_depth\n",
|
| 518 |
+
"languages\n",
|
| 519 |
+
"filetype\n",
|
| 520 |
+
"parent_id\n",
|
| 521 |
+
"url\n",
|
| 522 |
+
"category\n",
|
| 523 |
+
"element_id\n",
|
| 524 |
+
"category_depth\n",
|
| 525 |
+
"languages\n",
|
| 526 |
+
"filetype\n",
|
| 527 |
+
"url\n",
|
| 528 |
+
"category\n",
|
| 529 |
+
"element_id\n",
|
| 530 |
+
"category_depth\n",
|
| 531 |
+
"languages\n",
|
| 532 |
+
"filetype\n",
|
| 533 |
+
"url\n",
|
| 534 |
+
"category\n",
|
| 535 |
+
"element_id\n",
|
| 536 |
+
"languages\n",
|
| 537 |
+
"filetype\n",
|
| 538 |
+
"parent_id\n",
|
| 539 |
+
"url\n",
|
| 540 |
+
"category\n",
|
| 541 |
+
"element_id\n",
|
| 542 |
+
"category_depth\n",
|
| 543 |
+
"languages\n",
|
| 544 |
+
"filetype\n",
|
| 545 |
+
"parent_id\n",
|
| 546 |
+
"url\n",
|
| 547 |
+
"category\n",
|
| 548 |
+
"element_id\n",
|
| 549 |
+
"category_depth\n",
|
| 550 |
+
"languages\n",
|
| 551 |
+
"filetype\n",
|
| 552 |
+
"parent_id\n",
|
| 553 |
+
"url\n",
|
| 554 |
+
"category\n",
|
| 555 |
+
"element_id\n",
|
| 556 |
+
"category_depth\n",
|
| 557 |
+
"languages\n",
|
| 558 |
+
"filetype\n",
|
| 559 |
+
"url\n",
|
| 560 |
+
"category\n",
|
| 561 |
+
"element_id\n",
|
| 562 |
+
"category_depth\n",
|
| 563 |
+
"link_texts\n",
|
| 564 |
+
"link_urls\n",
|
| 565 |
+
"languages\n",
|
| 566 |
+
"filetype\n",
|
| 567 |
+
"url\n",
|
| 568 |
+
"category\n",
|
| 569 |
+
"element_id\n",
|
| 570 |
+
"category_depth\n",
|
| 571 |
+
"link_texts\n",
|
| 572 |
+
"link_urls\n",
|
| 573 |
+
"languages\n",
|
| 574 |
+
"filetype\n",
|
| 575 |
+
"url\n",
|
| 576 |
+
"category\n",
|
| 577 |
+
"element_id\n",
|
| 578 |
+
"languages\n",
|
| 579 |
+
"filetype\n",
|
| 580 |
+
"parent_id\n",
|
| 581 |
+
"url\n",
|
| 582 |
+
"category\n",
|
| 583 |
+
"element_id\n",
|
| 584 |
+
"category_depth\n",
|
| 585 |
+
"languages\n",
|
| 586 |
+
"filetype\n",
|
| 587 |
+
"url\n",
|
| 588 |
+
"category\n",
|
| 589 |
+
"element_id\n",
|
| 590 |
+
"category_depth\n",
|
| 591 |
+
"languages\n",
|
| 592 |
+
"filetype\n",
|
| 593 |
+
"url\n",
|
| 594 |
+
"category\n",
|
| 595 |
+
"element_id\n",
|
| 596 |
+
"category_depth\n",
|
| 597 |
+
"languages\n",
|
| 598 |
+
"filetype\n",
|
| 599 |
+
"url\n",
|
| 600 |
+
"category\n",
|
| 601 |
+
"element_id\n",
|
| 602 |
+
"languages\n",
|
| 603 |
+
"filetype\n",
|
| 604 |
+
"parent_id\n",
|
| 605 |
+
"url\n",
|
| 606 |
+
"category\n",
|
| 607 |
+
"element_id\n",
|
| 608 |
+
"category_depth\n",
|
| 609 |
+
"languages\n",
|
| 610 |
+
"filetype\n",
|
| 611 |
+
"parent_id\n",
|
| 612 |
+
"url\n",
|
| 613 |
+
"category\n",
|
| 614 |
+
"element_id\n",
|
| 615 |
+
"category_depth\n",
|
| 616 |
+
"languages\n",
|
| 617 |
+
"filetype\n",
|
| 618 |
+
"parent_id\n",
|
| 619 |
+
"url\n",
|
| 620 |
+
"category\n",
|
| 621 |
+
"element_id\n",
|
| 622 |
+
"category_depth\n",
|
| 623 |
+
"languages\n",
|
| 624 |
+
"filetype\n",
|
| 625 |
+
"url\n",
|
| 626 |
+
"category\n",
|
| 627 |
+
"element_id\n",
|
| 628 |
+
"category_depth\n",
|
| 629 |
+
"link_texts\n",
|
| 630 |
+
"link_urls\n",
|
| 631 |
+
"languages\n",
|
| 632 |
+
"filetype\n",
|
| 633 |
+
"url\n",
|
| 634 |
+
"category\n",
|
| 635 |
+
"element_id\n",
|
| 636 |
+
"category_depth\n",
|
| 637 |
+
"link_texts\n",
|
| 638 |
+
"link_urls\n",
|
| 639 |
+
"languages\n",
|
| 640 |
+
"filetype\n",
|
| 641 |
+
"url\n",
|
| 642 |
+
"category\n",
|
| 643 |
+
"element_id\n",
|
| 644 |
+
"languages\n",
|
| 645 |
+
"filetype\n",
|
| 646 |
+
"parent_id\n",
|
| 647 |
+
"url\n",
|
| 648 |
+
"category\n",
|
| 649 |
+
"element_id\n"
|
| 650 |
+
]
|
| 651 |
+
}
|
| 652 |
+
],
|
| 653 |
+
"source": [
|
| 654 |
+
"for data in dataset:\n",
|
| 655 |
+
" # print(f\"{data.page_content} \\n {data.link_texts if data.link_texts else \"\"} -------- {data.link_urls if data.link_urls else \"\"} \")\n",
|
| 656 |
+
" for mt in data.metadata:\n",
|
| 657 |
+
" print(mt)"
|
| 658 |
+
]
|
| 659 |
+
},
|
| 660 |
+
{
|
| 661 |
+
"cell_type": "code",
|
| 662 |
+
"execution_count": 19,
|
| 663 |
+
"metadata": {},
|
| 664 |
+
"outputs": [
|
| 665 |
+
{
|
| 666 |
+
"data": {
|
| 667 |
+
"text/plain": [
|
| 668 |
+
"[Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '3c1cec20a25849e446035e7a95eb6b54'}, page_content='Design. Develop. Deploy'),\n",
|
| 669 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'a9be7f1e667e1a81d4fec6bd42699600'}, page_content='Hexa Core Services For Your Business.'),\n",
|
| 670 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a9be7f1e667e1a81d4fec6bd42699600', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': 'e76565fb197dfa95003d0f3bae11010d'}, page_content='we build success stories for you, get your business a lucky touch'),\n",
|
| 671 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '572a046c5cc30997de111b1b10785237'}, page_content='Viridiv'),\n",
|
| 672 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '65184b4439938fb6b4bc6e5d337866fc'}, page_content='Healscure'),\n",
|
| 673 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '3831b9a9f6395c6e2ea6878650243a46'}, page_content='CodeLens'),\n",
|
| 674 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '942ae29a509225086d1c4f2f57fb3f98'}, page_content='Viridiv'),\n",
|
| 675 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '77ddcd23639bec902ea3619f141696db'}, page_content='Healscure'),\n",
|
| 676 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'a8321dedec3fe8899162f1b272783251'}, page_content='CodeLens'),\n",
|
| 677 |
+
" Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a8321dedec3fe8899162f1b272783251', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'dc4cbd6254cb64898decd517056100b3'}, page_content='Be the Achievers of Achievers'),\n",
|
| 678 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'dc4cbd6254cb64898decd517056100b3', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': '464461078fbed9c7787522bcb270409d'}, page_content='Unlock the full potential of your business with The Hexatech, your all-in-one partner for A-Z business solutions. From stunning designs to powerful development, strategic marketing, seamless deployment, and expert SEO—we’re here to turn your vision into reality.'),\n",
|
| 679 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'dc4cbd6254cb64898decd517056100b3', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': 'bb46e06fb19a60f7faa59b09b06a0f0b'}, page_content='Let’s build your success story together.'),\n",
|
| 680 |
+
" Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a8321dedec3fe8899162f1b272783251', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'a813a122ea83dfb365f3f8424aba978c'}, page_content='What we offer'),\n",
|
| 681 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a813a122ea83dfb365f3f8424aba978c', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': '8cd09a13b4f8df03d844d349b7fe2748'}, page_content='At The Hexatech, we provide end-to-end business solutions tailored to meet your unique goals. From building cutting-edge websites to harnessing the power of AI, we’ve got you covered. Our expert team ensures that your ideas turn into reality with seamless deployment, stunning designs, robust databases, and custom software solutions. Let us take your business to the next level with innovative technology and creative strategies.'),\n",
|
| 682 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a813a122ea83dfb365f3f8424aba978c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'b25451e0a7dc77c405a8156e32f9200a'}, page_content='Web Development'),\n",
|
| 683 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'b25451e0a7dc77c405a8156e32f9200a', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': '13eec4f02e0fd3d7824e7cc8c26f7a0f'}, page_content=\"We create high-performance, responsive websites that are not only visually stunning but also optimized for functionality. Whether it's a corporate site or an e-commerce platform, we bring your online presence to life.\"),\n",
|
| 684 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a813a122ea83dfb365f3f8424aba978c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'ef0b6aa953694b1cb1caf329c0263b5c'}, page_content='AI & Machine Learning'),\n",
|
| 685 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'ef0b6aa953694b1cb1caf329c0263b5c', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': 'c93d8d94014b39c79538e77764d06774'}, page_content='Unlock the future with AI and machine learning solutions that help you automate processes, analyze data, and make smarter decisions. From predictive analytics to personalized customer experiences, we’ll help you stay ahead of the curve.'),\n",
|
| 686 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a813a122ea83dfb365f3f8424aba978c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '0991c56dd134d345ff8d6e16ffd7f768'}, page_content='Deployment & Maintenance'),\n",
|
| 687 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '0991c56dd134d345ff8d6e16ffd7f768', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': 'df7477130921e1d1a646e57784e30878'}, page_content='Leave the technicalities to us! We offer seamless deployment services, ensuring your web and software solutions are launched efficiently with ongoing maintenance to keep everything running smoothly.'),\n",
|
| 688 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a813a122ea83dfb365f3f8424aba978c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '9f8b94bb45907e6adca40628ce5d6852'}, page_content='Design Services'),\n",
|
| 689 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '9f8b94bb45907e6adca40628ce5d6852', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': '7881ffb9d25e9f4121467278c8a06dc6'}, page_content='First impressions matter! Our design team crafts intuitive, eye-catching designs that resonate with your brand’s identity. From branding to UI/UX design, we make sure your product looks as good as it performs.'),\n",
|
| 690 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a813a122ea83dfb365f3f8424aba978c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '478986103f66efcb7df645b940f29e7f'}, page_content='Database Management'),\n",
|
| 691 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '478986103f66efcb7df645b940f29e7f', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': '9bd9a46865c444f28f4b5a915c3644db'}, page_content='Your data is your most valuable asset. We offer comprehensive database solutions, from design and setup to optimization and management, ensuring secure, scalable, and efficient data handling.'),\n",
|
| 692 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a813a122ea83dfb365f3f8424aba978c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'ef57a5666f7e4af366fb092a663f9608'}, page_content='Custom Software'),\n",
|
| 693 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'ef57a5666f7e4af366fb092a663f9608', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': 'bce6bffd80584db9a2b56f80abd81e3a'}, page_content='From concept to execution, we build tailored software solutions that address your unique business needs. Whether it’s enterprise software or mobile apps, we deliver solutions that drive efficiency and growth.'),\n",
|
| 694 |
+
" Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a8321dedec3fe8899162f1b272783251', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '938668ef3561d6b1360299c2d6d5a807'}, page_content='About Us'),\n",
|
| 695 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '938668ef3561d6b1360299c2d6d5a807', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': '70d631dd6329e150544527dc9c6c9c35'}, page_content='At The Hexatech, we’re more than just a service provider—we’re your strategic partner in building and scaling your business. With a passion for innovation and a commitment to excellence, we deliver A-Z business solutions that empower companies to thrive in the digital age. Founded with the mission to simplify complex processes and accelerate growth, we specialize in everything from web development and AI to design, deployment, and custom software. Whether you’re a startup launching an MVP or an established business looking to redefine your product, we tailor our solutions to meet your unique needs. Our team of expert developers, designers, and strategists work closely with you to bring your vision to life. At The Hexatech, we believe in a hands-on approach, diving deep into customer development and market analysis to ensure that every solution we deliver is not only functional but also impactful. We’re here to help you Design, Develop, Dominate—because your success is our success.'),\n",
|
| 696 |
+
" Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a8321dedec3fe8899162f1b272783251', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'bcf2561eb44dc7c53d51afd41165301a'}, page_content='Testimonials'),\n",
|
| 697 |
+
" Document(metadata={'category_depth': 3, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'bcf2561eb44dc7c53d51afd41165301a', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '86f5f12762496f3571c7892e3ba135a9'}, page_content='\"I can\\'t say enough about the quality of work I have received. I highly recommend The Hexatech for anyone looking for an exceptional web development company.\"'),\n",
|
| 698 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'bcf2561eb44dc7c53d51afd41165301a', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'daccc2be3cbbbae5fa1d1cea8b44bd53'}, page_content='Raj Shaw'),\n",
|
| 699 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '022e783cb1b79b0a4f0f93bb04e41f2b'}, page_content='moodmeter'),\n",
|
| 700 |
+
" Document(metadata={'category_depth': 3, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '022e783cb1b79b0a4f0f93bb04e41f2b', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '94e02c7aea78a3648a772f0cf4f2320b'}, page_content='\"The Hexatech has been a game changer for my business. Their team is incredibly skilled and responsive, and their work has been exceptional. I highly recommend The Hexatech for any web development & AI needs.\"'),\n",
|
| 701 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '022e783cb1b79b0a4f0f93bb04e41f2b', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '187b2131a84bd4710fafc95df4827972'}, page_content='Rishi Pipaliya'),\n",
|
| 702 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '643eab2a84465c21396eba4da3754638'}, page_content='Rose & Fern'),\n",
|
| 703 |
+
" Document(metadata={'category_depth': 3, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '643eab2a84465c21396eba4da3754638', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '94a56bfb26ec626221542aca30b92e31'}, page_content='\"The Hexatech team has been incredibly professional and responsive. They have been able to meet our needs and exceed our expectations.\"'),\n",
|
| 704 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '643eab2a84465c21396eba4da3754638', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '4c4c51cb803d93f20260a5d37f405ebb'}, page_content='Jeet Ghosh'),\n",
|
| 705 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '019af31e2271b35e3f6304bef67a2325'}, page_content='LumaticAI'),\n",
|
| 706 |
+
" Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '019af31e2271b35e3f6304bef67a2325', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '5a8cbcc5d057a3c31215b56f56eebe80'}, page_content='How We Work'),\n",
|
| 707 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '5a8cbcc5d057a3c31215b56f56eebe80', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'b6b7d000c4c0b142c45e691491e2be66'}, page_content='01'),\n",
|
| 708 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '2df4608e232cfde989b8d2c73a921d5e'}, page_content='Click on quote'),\n",
|
| 709 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '2df4608e232cfde989b8d2c73a921d5e', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'a20a3cdb6b78e4c93999a1d977b832a2'}, page_content='02'),\n",
|
| 710 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '033563c87be9c8b842fef65fa51eb87c'}, page_content='Submit the quote form'),\n",
|
| 711 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '033563c87be9c8b842fef65fa51eb87c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'bcb091bdbae90710a3cfc43fecc32854'}, page_content='03'),\n",
|
| 712 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'bcb091bdbae90710a3cfc43fecc32854', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': 'f570c1fa7f254209fc40301203fc352d'}, page_content='We get in touch through meet'),\n",
|
| 713 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '033563c87be9c8b842fef65fa51eb87c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '76b8fe7da31a1c90ca3f7822d11ef24c'}, page_content='04'),\n",
|
| 714 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '76b8fe7da31a1c90ca3f7822d11ef24c', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': 'fc6d45688159d48648384deb3602224e'}, page_content='Deal is finalized'),\n",
|
| 715 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '033563c87be9c8b842fef65fa51eb87c', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'cb85bb1a427444c57f23259910a63271'}, page_content='05'),\n",
|
| 716 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'cb85bb1a427444c57f23259910a63271', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': 'b3dfc81aee234a242f679415f694e71e'}, page_content='Project is delivered after payment'),\n",
|
| 717 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'de7e4a85336d0f00a0c0af354115d06d'}, page_content='Click Here'),\n",
|
| 718 |
+
" Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'de7e4a85336d0f00a0c0af354115d06d', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'f2542a84aa4bdfba53ecb1b944cd1087'}, page_content=\"Have a project in mind? Let's Talk\"),\n",
|
| 719 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'f2542a84aa4bdfba53ecb1b944cd1087', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'f8caf8ca5810b78c46d4bc3c2b0fd8f9'}, page_content='Our Location'),\n",
|
| 720 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '6f24b27afe3e5f383363f470d3738f24'}, page_content='Om Shayona Arcade, Gota, Ahmedabad, Gujarat, India, 382481'),\n",
|
| 721 |
+
" Document(metadata={'category_depth': 0, 'link_texts': ['Home', 'About', 'Services', 'Testimonials', 'Quote'], 'link_urls': ['#page1', '/about/index.html', '#page1Box2', '#testimonials', '/quote/index.html'], 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': 'c88f8facd545bb6e8a062d13982f2ebe'}, page_content='Home About Services Testimonials Quote'),\n",
|
| 722 |
+
" Document(metadata={'category_depth': 0, 'link_texts': ['LinkedIn', 'Instagram', 'Email', 'Phone'], 'link_urls': ['https://www.linkedin.com/company/the-hexatech/about/', 'https://www.instagram.com/thehexatech/', '/cdn-cgi/l/email-protection#5024383538352831243533387e3f3636393339313c10373d31393c7e333f3d', 'tel:+91 9749525157'], 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com', 'category': 'Title', 'element_id': '4334e83dc625417d783e2a7a75ee8540'}, page_content='LinkedIn Instagram Email Phone'),\n",
|
| 723 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '4334e83dc625417d783e2a7a75ee8540', 'url': 'https://thehexatech.com', 'category': 'NarrativeText', 'element_id': '8cbd7e7952b30d4ed3472272e6e0e5a5'}, page_content='© 2024, The Hexatech. All Rights Reserved.'),\n",
|
| 724 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '9d0357eb48545a3eb0190751f5ac58f6'}, page_content='Our Company'),\n",
|
| 725 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '9d0357eb48545a3eb0190751f5ac58f6', 'url': 'https://thehexatech.com/about/index.html', 'category': 'NarrativeText', 'element_id': '2fb42adc7b5e984d33d83cc5f9533737'}, page_content='At The Hexatech, we believe that every great business starts with a bold idea. We specialize in providing end-to-end business solutions that help entrepreneurs and businesses bring their visions to life. From designing unique digital experiences, to developing state-of-the-art software, to implementing cutting-edge AI and machine learning systems — we do it all.'),\n",
|
| 726 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '9d0357eb48545a3eb0190751f5ac58f6', 'url': 'https://thehexatech.com/about/index.html', 'category': 'NarrativeText', 'element_id': '6eea97466a12a2eb4691a7bf4dd7dbd1'}, page_content='Our approach is holistic. We not only build and deploy, but also support businesses throughout their entire lifecycle. Whether you are launching a new product, refining an existing one, or optimizing for scalability, we ensure that every step is handled with care and precision. Our dedicated team of experts will work with you to innovate, develop, and grow your brand to achieve sustained market success.'),\n",
|
| 727 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '9d0357eb48545a3eb0190751f5ac58f6', 'url': 'https://thehexatech.com/about/index.html', 'category': 'NarrativeText', 'element_id': '0a688be6002688e1bd236b83c6685328'}, page_content='The Hexatech is your trusted partner for turning digital challenges into opportunities, combining creativity, technology, and business acumen to help you stay ahead of the competition. With our tailored solutions, we aim to transform your business and ensure it thrives in today’s fast-evolving digital world.'),\n",
|
| 728 |
+
" Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '9d0357eb48545a3eb0190751f5ac58f6', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '30e38d2f2cc891f1ce1d512ad55a5d4e'}, page_content='Meet Our Founders'),\n",
|
| 729 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '30e38d2f2cc891f1ce1d512ad55a5d4e', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': 'd0ef3163f0a426ee1ddf8082a05eaae1'}, page_content='Rohan Shaw'),\n",
|
| 730 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '864b9fd57ef3c68cf9c1beaa71717167'}, page_content='Co-Founder & CEO'),\n",
|
| 731 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '864b9fd57ef3c68cf9c1beaa71717167', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': 'b04d02f04d097bddd50214ed1411f9b4'}, page_content='Sujal Merani'),\n",
|
| 732 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '38575392064a16ad2e677971c65909af'}, page_content='Co-Founder & CSO'),\n",
|
| 733 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '38575392064a16ad2e677971c65909af', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '76302995c4c37fa70c313f943af033e1'}, page_content='Fresil Patel'),\n",
|
| 734 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '988847bc72eb2d24bbf243b589f0e1f5'}, page_content='Co-Founder & CMO'),\n",
|
| 735 |
+
" Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '988847bc72eb2d24bbf243b589f0e1f5', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '1c6a0d5446f88de7f3a64268d3482537'}, page_content='Contact'),\n",
|
| 736 |
+
" Document(metadata={'category_depth': 2, 'link_texts': ['[email protected]'], 'link_urls': ['/cdn-cgi/l/email-protection#4034282528253821342523286e2f2626292329212c00272d21292c6e232f2d'], 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '1c6a0d5446f88de7f3a64268d3482537', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': 'e62503f6dd746e461dd27b76adbc1e69'}, page_content='Email : [email protected]'),\n",
|
| 737 |
+
" Document(metadata={'category_depth': 2, 'link_texts': ['+91 9749525157', '+91 9727226136', '+91 8320372440'], 'link_urls': ['tel:+91 9749525157', 'tel:+91 9727226136', 'tel:+91 8320372440'], 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '1c6a0d5446f88de7f3a64268d3482537', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '9322425df598a7db36800b11ff6e4341'}, page_content='Phone : +91 9749525157 | +91 9727226136 | +91 8320372440'),\n",
|
| 738 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '1c6a0d5446f88de7f3a64268d3482537', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '316a26b53e2457c38a24dfd834bb27a3'}, page_content='Head Office :'),\n",
|
| 739 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '9c234b8193a3c6e2094c1c9b25cab652'}, page_content='211, 2nd floor, Om Shayona Arcade, Gota, Ahmedabad, Gujarat, India'),\n",
|
| 740 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '67118c836ec07ff0aa780851e2bff109'}, page_content='Click Here'),\n",
|
| 741 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '67118c836ec07ff0aa780851e2bff109', 'url': 'https://thehexatech.com/about/index.html', 'category': 'UncategorizedText', 'element_id': '6ce65a8bee8156d51839d5bb165daccd'}, page_content='&/#10132;'),\n",
|
| 742 |
+
" Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '67118c836ec07ff0aa780851e2bff109', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': 'a7c289d38a16d0061c9cf53ee60d3fe7'}, page_content=\"Have a project in mind? Let's Talk\"),\n",
|
| 743 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a7c289d38a16d0061c9cf53ee60d3fe7', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': 'fff5820e7c52059c6dd4b80fac377378'}, page_content='Our Location'),\n",
|
| 744 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': 'd8cac8da8b7789dec9d2150a34ad5cc3'}, page_content='Om Shayona Arcade, Gota, Ahmedabad, Gujarat, India, 382481'),\n",
|
| 745 |
+
" Document(metadata={'category_depth': 0, 'link_texts': ['Home', 'About', 'Services', 'Testimonials', 'Quote'], 'link_urls': ['/#page1', '/about/index.html', '/#page1Box2', '/#testimonials', '/quote/index.html'], 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': '64b0a55bd9eba5ca20f6c33fed27166f'}, page_content='Home About Services Testimonials Quote'),\n",
|
| 746 |
+
" Document(metadata={'category_depth': 0, 'link_texts': ['LinkedIn', 'Instagram', 'Email', 'Phone'], 'link_urls': ['https://www.linkedin.com/company/the-hexatech/about/', 'https://www.instagram.com/thehexatech/', '/cdn-cgi/l/email-protection#9de9f5f8f5f8e5fce9f8fef5b3f2fbfbf4fef4fcf1ddfaf0fcf4f1b3fef2f0', 'tel:+91 9749525157'], 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/about/index.html', 'category': 'Title', 'element_id': 'a9207d75c7ac89e9d67d65cffa2f9807'}, page_content='LinkedIn Instagram Email Phone'),\n",
|
| 747 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'a9207d75c7ac89e9d67d65cffa2f9807', 'url': 'https://thehexatech.com/about/index.html', 'category': 'NarrativeText', 'element_id': '9c6e77a08330da21afcbe6a9ce68f244'}, page_content='© 2024, The Hexatech. All Rights Reserved.'),\n",
|
| 748 |
+
" Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'Title', 'element_id': '3c242d6309c71ef20ac0a761c047c5ea'}, page_content='Get A Quote'),\n",
|
| 749 |
+
" Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'Title', 'element_id': '3c8adb6c12d201c5797ac00b7334ebb4'}, page_content='Thank you! We will get back to you soon.'),\n",
|
| 750 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'Title', 'element_id': 'd0bb7211fcefbbc5691caeb93cc8d56c'}, page_content='Click Here'),\n",
|
| 751 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'd0bb7211fcefbbc5691caeb93cc8d56c', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'UncategorizedText', 'element_id': 'e7eaf82161f1ec7d7ac4937791c0e7d7'}, page_content='&/#10132;'),\n",
|
| 752 |
+
" Document(metadata={'category_depth': 1, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'd0bb7211fcefbbc5691caeb93cc8d56c', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'Title', 'element_id': '0794700a012e87ec297b94d01edee052'}, page_content=\"Have a project in mind? Let's Talk\"),\n",
|
| 753 |
+
" Document(metadata={'category_depth': 2, 'languages': ['eng'], 'filetype': 'text/html', 'parent_id': '0794700a012e87ec297b94d01edee052', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'Title', 'element_id': 'd23be126165c0fff800694f8f8bac6ff'}, page_content='Our Location'),\n",
|
| 754 |
+
" Document(metadata={'category_depth': 0, 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'Title', 'element_id': 'ba20a3788d99b79398694e2be15be0a4'}, page_content='Om Shayona Arcade, Gota, Ahmedabad, Gujarat, India, 382481'),\n",
|
| 755 |
+
" Document(metadata={'category_depth': 0, 'link_texts': ['Home', 'About', 'Services', 'Testimonials', 'Quote'], 'link_urls': ['/#page1', '/about/index.html', '/#page1Box2', '/#testimonials', '/quote/index.html'], 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'Title', 'element_id': '16c0f77bbd46a85c591652e06af0595b'}, page_content='Home About Services Testimonials Quote'),\n",
|
| 756 |
+
" Document(metadata={'category_depth': 0, 'link_texts': ['LinkedIn', 'Instagram', 'Email', 'Phone'], 'link_urls': ['https://www.linkedin.com/company/the-hexatech/about/', 'https://www.instagram.com/thehexatech/', '/cdn-cgi/l/email-protection#b0c4d8d5d8d5c8d1c4d5d3d89edfd6d6d9d3d9d1dcf0d7ddd1d9dc9ed3dfdd', 'tel:+91 9749525157'], 'languages': ['eng'], 'filetype': 'text/html', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'Title', 'element_id': 'f8bff147020fc1cb4df51c14fb152bcb'}, page_content='LinkedIn Instagram Email Phone'),\n",
|
| 757 |
+
" Document(metadata={'languages': ['eng'], 'filetype': 'text/html', 'parent_id': 'f8bff147020fc1cb4df51c14fb152bcb', 'url': 'https://thehexatech.com/quote/index.html', 'category': 'NarrativeText', 'element_id': '8827d6da3cae3e9982671539a167ecd2'}, page_content='© 2024, The Hexatech. All Rights Reserved.')]"
|
| 758 |
+
]
|
| 759 |
+
},
|
| 760 |
+
"execution_count": 19,
|
| 761 |
+
"metadata": {},
|
| 762 |
+
"output_type": "execute_result"
|
| 763 |
+
}
|
| 764 |
+
],
|
| 765 |
+
"source": [
|
| 766 |
+
"dataset"
|
| 767 |
+
]
|
| 768 |
+
}
|
| 769 |
+
],
|
| 770 |
+
"metadata": {
|
| 771 |
+
"kernelspec": {
|
| 772 |
+
"display_name": "venv1",
|
| 773 |
+
"language": "python",
|
| 774 |
+
"name": "venv1"
|
| 775 |
+
},
|
| 776 |
+
"language_info": {
|
| 777 |
+
"codemirror_mode": {
|
| 778 |
+
"name": "ipython",
|
| 779 |
+
"version": 3
|
| 780 |
+
},
|
| 781 |
+
"file_extension": ".py",
|
| 782 |
+
"mimetype": "text/x-python",
|
| 783 |
+
"name": "python",
|
| 784 |
+
"nbconvert_exporter": "python",
|
| 785 |
+
"pygments_lexer": "ipython3",
|
| 786 |
+
"version": "3.12.2"
|
| 787 |
+
}
|
| 788 |
+
},
|
| 789 |
+
"nbformat": 4,
|
| 790 |
+
"nbformat_minor": 2
|
| 791 |
+
}
|