Spaces:
Sleeping
Sleeping
Upload 10 files
Browse files- README.md +46 -13
- app.py +51 -0
- chatbot.py +75 -0
- config.py +14 -0
- error_logger.py +15 -0
- graphs.py +34 -0
- langchain_text_splitter.py +17 -0
- summarizer.py +21 -0
- text_extraction.py +32 -0
- vector_store.py +34 -0
README.md
CHANGED
|
@@ -1,13 +1,46 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Text Retrieval and Summarizer ChatBot Framework
|
| 2 |
+
|
| 3 |
+
### RAG (Retrieval-Augmented Generation) System
|
| 4 |
+
|
| 5 |
+
**Project Summary**:
|
| 6 |
+
|
| 7 |
+
This project is a text retrieval and summarization system that allows users to input a question and receive a concise summary based on relevant content.
|
| 8 |
+
|
| 9 |
+
It works by first converting the user’s input into numerical embeddings using a sentence transformer model. These embeddings are then compared against a pre-built vector index (FAISS) to identify the most relevant text chunks from your dataset. The retrieved content is combined and passed to a transformer-based summarization model BART, which generates a concise summary as the final output.
|
| 10 |
+
|
| 11 |
+
The entire pipeline is integrated into an interactive user interface built using Gradio, allowing users to easily input queries and view summarized results in real time.
|
| 12 |
+
|
| 13 |
+
Steps:
|
| 14 |
+
1. Retrieves relevant text from a User's Document (FAISS)
|
| 15 |
+
2. Converts Corpus to Sentences (Sentence Transformer)
|
| 16 |
+
3. Generates a Summarized output (HuggingFace Text Summarizer)
|
| 17 |
+
|
| 18 |
+
**Use of SBERT**:
|
| 19 |
+
|
| 20 |
+
Sentence Transformers(SBERT), uses pretrained "Embedding" models, all we do is provide them our chunks from previous step and it creates vectors. (huggingface)
|
| 21 |
+
Embeddings are dense, lower-dimensional, numerical vector representations of data such as text, images, or audio that capture semantic meaning and relationships.(soucre: google)
|
| 22 |
+
|
| 23 |
+
Steps:
|
| 24 |
+
1. Load an embedding model
|
| 25 |
+
2. Feed text chunks into the model
|
| 26 |
+
3. Convert each chunk into a vector of numbers
|
| 27 |
+
|
| 28 |
+
Transformer Model (all-MiniLM-L6-v2):
|
| 29 |
+
This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search.(huggingface)
|
| 30 |
+
|
| 31 |
+
**Use of FAISS**:
|
| 32 |
+
|
| 33 |
+
FAISS as a super-fast “vector search engine”, stands for Facebook AI Similarity Search.
|
| 34 |
+
It is an open-source library developed by Meta's Fundamental AI Research group (formerly Facebook AI Research) designed for the efficient similarity search and clustering of dense vectors. (google)
|
| 35 |
+
|
| 36 |
+
Takes chunks of text from the document
|
| 37 |
+
As each chunk is previously converted to a 384-dimensional embedding by MiniLM
|
| 38 |
+
This store all embeddings in FAISS
|
| 39 |
+
so when a user asks a question, the question is converted to a vector and FAISS finds the nearest embeddings (most similar chunks of text from the document)
|
| 40 |
+
Then we pass those chunks to your LLM to generate the answer
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
**Final Pipeline**:
|
| 44 |
+
Take PDF -> Get chunks -> Make embeddings -> Ask Question -> Retrieve Answer -> Summarize Result and Display Metrics
|
| 45 |
+
|
| 46 |
+
*--by Murk Asad*
|
app.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#setting up interface
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
from error_logger import setup_logger
|
| 5 |
+
from text_extraction import load_pdf_text
|
| 6 |
+
from langchain_text_splitter import clean_text, create_chunks
|
| 7 |
+
from vector_store import build_vectorstore
|
| 8 |
+
from summarizer import load_summarizer
|
| 9 |
+
from chatbot import chat_answer
|
| 10 |
+
from config import PDF_PATH
|
| 11 |
+
|
| 12 |
+
setup_logger() #handle errors if any and then log them
|
| 13 |
+
|
| 14 |
+
corpus = load_pdf_text(PDF_PATH)
|
| 15 |
+
cleaned = clean_text(corpus)
|
| 16 |
+
chunks = create_chunks(cleaned)
|
| 17 |
+
|
| 18 |
+
embedding_model, index = build_vectorstore(chunks)
|
| 19 |
+
summarizer = load_summarizer()
|
| 20 |
+
|
| 21 |
+
def respond(message, history):
|
| 22 |
+
answer, metrics, g1, g2, g3 = chat_answer(
|
| 23 |
+
message,
|
| 24 |
+
history,
|
| 25 |
+
embedding_model,
|
| 26 |
+
index,
|
| 27 |
+
chunks,
|
| 28 |
+
summarizer
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
history.append({"role": "user", "content": message})
|
| 32 |
+
history.append({"role": "assistant", "content": answer})
|
| 33 |
+
|
| 34 |
+
return history, metrics, g1, g2, g3
|
| 35 |
+
|
| 36 |
+
with gr.Blocks() as demo:
|
| 37 |
+
gr.Markdown("## Deep Learning Chat with Metrics & Graphs")
|
| 38 |
+
|
| 39 |
+
chatbot = gr.Chatbot()
|
| 40 |
+
msg = gr.Textbox(label="Ask a question")
|
| 41 |
+
|
| 42 |
+
metrics_box = gr.Textbox(label="Metrics")
|
| 43 |
+
g1 = gr.Image(label="Graph 1")
|
| 44 |
+
g2 = gr.Image(label="Graph 2")
|
| 45 |
+
g3 = gr.Image(label="Graph 3")
|
| 46 |
+
|
| 47 |
+
msg.submit(respond, [msg, chatbot], [chatbot, metrics_box, g1, g2, g3])
|
| 48 |
+
|
| 49 |
+
gr.Markdown("RAG Project by Murk Asad")
|
| 50 |
+
|
| 51 |
+
demo.launch()
|
chatbot.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#getting replies
|
| 2 |
+
|
| 3 |
+
import time
|
| 4 |
+
import logging
|
| 5 |
+
from config import TOP_K, MAX_RETRIEVED_WORDS
|
| 6 |
+
from graphs import create_graphs
|
| 7 |
+
from vector_store import retrieve_chunks
|
| 8 |
+
from summarizer import summarize_text
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
def chat_answer(message, history, embedding_model, index, chunks, summarizer):
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
context = " ".join(
|
| 16 |
+
str(h["content"])
|
| 17 |
+
for h in history[-3:]
|
| 18 |
+
if h["role"] == "user"
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
# for h in history[-3:] means loop through each message (h)
|
| 22 |
+
# if h["role"] == "user" means keep only user messages (ignore assistant replies)
|
| 23 |
+
# h["content"] will extract the actual text of the user question
|
| 24 |
+
# then create a list of those questions and joins them
|
| 25 |
+
|
| 26 |
+
full_query = context + " " + message
|
| 27 |
+
|
| 28 |
+
t1 = time.time()
|
| 29 |
+
retrieved_chunks = retrieve_chunks(
|
| 30 |
+
full_query,
|
| 31 |
+
embedding_model,
|
| 32 |
+
index,
|
| 33 |
+
chunks,
|
| 34 |
+
TOP_K
|
| 35 |
+
)
|
| 36 |
+
t2 = time.time()
|
| 37 |
+
|
| 38 |
+
answer = " ".join(retrieved_chunks)
|
| 39 |
+
answer = " ".join(answer.split()[:MAX_RETRIEVED_WORDS])
|
| 40 |
+
|
| 41 |
+
summary = summarize_text(answer, summarizer)
|
| 42 |
+
t3 = time.time()
|
| 43 |
+
|
| 44 |
+
retrieved_len = len(answer.split())
|
| 45 |
+
summary_len = len(summary.split())
|
| 46 |
+
|
| 47 |
+
#to fetch time it takes for evry step in the pipleine
|
| 48 |
+
stage_times = {
|
| 49 |
+
"Retrieve": t2 - t1,
|
| 50 |
+
"Summarize": t3 - t2
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
chunk_lengths = [len(c.split()) for c in chunks]
|
| 54 |
+
|
| 55 |
+
g1, g2, g3 = create_graphs(
|
| 56 |
+
chunk_lengths,
|
| 57 |
+
retrieved_len,
|
| 58 |
+
summary_len,
|
| 59 |
+
stage_times
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
metrics = f"""
|
| 63 |
+
Retrieved words: {retrieved_len}
|
| 64 |
+
Summary words: {summary_len}
|
| 65 |
+
Compression ratio: {round(summary_len / max(retrieved_len,1), 3)}
|
| 66 |
+
|
| 67 |
+
Retrieval time: {round(stage_times['Retrieve'],3)}s
|
| 68 |
+
Summarization time: {round(stage_times['Summarize'],3)}s
|
| 69 |
+
"""
|
| 70 |
+
|
| 71 |
+
return summary, metrics, g1, g2, g3
|
| 72 |
+
|
| 73 |
+
except Exception as e:
|
| 74 |
+
logger.exception("Chatbot error")
|
| 75 |
+
return "Error occurred. Please try again.", "", None, None, None #get the 5 things (summary, metrics, g1, g2, g3) from return statement or give None for properly handling error
|
config.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#for basic configurations
|
| 2 |
+
PDF_PATH = "data/Deep+Learning+Ian+Goodfellow.pdf"
|
| 3 |
+
|
| 4 |
+
CHUNK_SIZE = 500
|
| 5 |
+
CHUNK_OVERLAP = 50
|
| 6 |
+
|
| 7 |
+
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
|
| 8 |
+
SUMMARIZER_MODEL = "facebook/bart-large-cnn"
|
| 9 |
+
|
| 10 |
+
TOP_K = 2
|
| 11 |
+
MAX_RETRIEVED_WORDS = 200
|
| 12 |
+
|
| 13 |
+
MIN_SUMMARY_LEN = 20
|
| 14 |
+
MAX_SUMMARY_LEN = 50
|
error_logger.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#for error handling
|
| 2 |
+
import logging
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
def setup_logger():
|
| 6 |
+
os.makedirs("logs", exist_ok=True)
|
| 7 |
+
|
| 8 |
+
logging.basicConfig(
|
| 9 |
+
level=logging.INFO,
|
| 10 |
+
format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
|
| 11 |
+
handlers=[
|
| 12 |
+
logging.FileHandler("logs/app.log"),
|
| 13 |
+
logging.StreamHandler()
|
| 14 |
+
]
|
| 15 |
+
)
|
graphs.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib.pyplot as plt
|
| 2 |
+
import tempfile
|
| 3 |
+
import os
|
| 4 |
+
import uuid #helps generate temporary random ids for storing graphs with unique names randomly and save them
|
| 5 |
+
|
| 6 |
+
def create_graphs(chunk_lengths, retrieved_len, summary_len, stage_times):
|
| 7 |
+
temp_dir = tempfile.gettempdir()
|
| 8 |
+
uid = str(uuid.uuid4())
|
| 9 |
+
|
| 10 |
+
# Graph 1
|
| 11 |
+
plt.figure()
|
| 12 |
+
plt.bar(stage_times.keys(), stage_times.values())
|
| 13 |
+
plt.title("Pipeline Stage Execution Time")
|
| 14 |
+
g1 = os.path.join(temp_dir, f"g1_{uid}.png")
|
| 15 |
+
plt.savefig(g1)
|
| 16 |
+
plt.close()
|
| 17 |
+
|
| 18 |
+
# Graph 2
|
| 19 |
+
plt.figure()
|
| 20 |
+
plt.hist(chunk_lengths, bins=20)
|
| 21 |
+
plt.title("Chunk Length Distribution")
|
| 22 |
+
g2 = os.path.join(temp_dir, f"g2_{uid}.png")
|
| 23 |
+
plt.savefig(g2)
|
| 24 |
+
plt.close()
|
| 25 |
+
|
| 26 |
+
# Graph 3
|
| 27 |
+
plt.figure()
|
| 28 |
+
plt.bar(["Retrieved", "Summary"], [retrieved_len, summary_len])
|
| 29 |
+
plt.title("Retrieved vs Summary Length")
|
| 30 |
+
g3 = os.path.join(temp_dir, f"g3_{uid}.png")
|
| 31 |
+
plt.savefig(g3)
|
| 32 |
+
plt.close()
|
| 33 |
+
|
| 34 |
+
return g1, g2, g3
|
langchain_text_splitter.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#splitting text to chunks from the extracted pdf file, and overlapping chunks to keep some previous context
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 5 |
+
from config import CHUNK_SIZE, CHUNK_OVERLAP #getting values from configuration file
|
| 6 |
+
|
| 7 |
+
def clean_text(corpus: str) -> str:
|
| 8 |
+
corpus = re.sub(r'\s+', ' ', corpus)
|
| 9 |
+
corpus = re.sub(r'([a-z])([A-Z])', r'\1 \2', corpus)
|
| 10 |
+
return corpus.lower()
|
| 11 |
+
|
| 12 |
+
def create_chunks(text: str):
|
| 13 |
+
splitter = RecursiveCharacterTextSplitter(
|
| 14 |
+
chunk_size=CHUNK_SIZE,
|
| 15 |
+
chunk_overlap=CHUNK_OVERLAP
|
| 16 |
+
)
|
| 17 |
+
return splitter.split_text(text)
|
summarizer.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#summarizing the closest 2 chunks extracted from vector store
|
| 2 |
+
|
| 3 |
+
from transformers import pipeline
|
| 4 |
+
from config import SUMMARIZER_MODEL, MIN_SUMMARY_LEN, MAX_SUMMARY_LEN
|
| 5 |
+
|
| 6 |
+
def load_summarizer():
|
| 7 |
+
return pipeline("summarization", model=SUMMARIZER_MODEL)
|
| 8 |
+
|
| 9 |
+
def summarize_text(text, summarizer):
|
| 10 |
+
if not text or not text.strip(): #if input text is empty or even if we remove spaces still empty
|
| 11 |
+
raise ValueError("Input text for summarization is empty.")
|
| 12 |
+
|
| 13 |
+
output = summarizer(
|
| 14 |
+
text,
|
| 15 |
+
repetition_penalty=5.0,
|
| 16 |
+
length_penalty=0.3,
|
| 17 |
+
min_length=MIN_SUMMARY_LEN,
|
| 18 |
+
max_length=MAX_SUMMARY_LEN
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
return output[0]["summary_text"] #pipeline returns alot of type of dictionaries, we only need the short summary from it so we use [0] and "summary_text"
|
text_extraction.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#extracting from the pdf book
|
| 2 |
+
import pdfplumber
|
| 3 |
+
import logging
|
| 4 |
+
|
| 5 |
+
logger = logging.getLogger(__name__)
|
| 6 |
+
|
| 7 |
+
def load_pdf_text(pdf_path: str) -> str:
|
| 8 |
+
try:
|
| 9 |
+
corpus = ""
|
| 10 |
+
|
| 11 |
+
with pdfplumber.open(pdf_path) as pdf:
|
| 12 |
+
for page_num, page in enumerate(pdf.pages, start=1): #books usually start at later pages, page1 is only the book title
|
| 13 |
+
text = page.extract_text()
|
| 14 |
+
|
| 15 |
+
if text:
|
| 16 |
+
corpus += text + " "
|
| 17 |
+
else:
|
| 18 |
+
logger.warning(f"No text found on page {page_num}")
|
| 19 |
+
|
| 20 |
+
if not corpus.strip():
|
| 21 |
+
raise ValueError("Empty PDF content")
|
| 22 |
+
|
| 23 |
+
logger.info("PDF loaded successfully")
|
| 24 |
+
return corpus
|
| 25 |
+
|
| 26 |
+
except FileNotFoundError:
|
| 27 |
+
logger.error("PDF file not found")
|
| 28 |
+
raise
|
| 29 |
+
|
| 30 |
+
except Exception as e:
|
| 31 |
+
logger.exception(f"Error loading PDF: {e}")
|
| 32 |
+
raise
|
vector_store.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#transforming sentence chunks from langchain into vectors usin faiss
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import faiss
|
| 5 |
+
from sentence_transformers import SentenceTransformer
|
| 6 |
+
from config import EMBEDDING_MODEL
|
| 7 |
+
|
| 8 |
+
def load_embedding_model():
|
| 9 |
+
return SentenceTransformer(EMBEDDING_MODEL) #all-MiniLM-L6-v2 from config file, we can change it
|
| 10 |
+
|
| 11 |
+
def build_vectorstore(chunks):
|
| 12 |
+
if not chunks:
|
| 13 |
+
raise ValueError("Chunks list is empty.")
|
| 14 |
+
|
| 15 |
+
model = load_embedding_model()
|
| 16 |
+
embeddings = model.encode(chunks)
|
| 17 |
+
dimension = embeddings.shape[1]
|
| 18 |
+
|
| 19 |
+
index = faiss.IndexFlatL2(dimension)
|
| 20 |
+
index.add(np.array(embeddings).astype("float32"))
|
| 21 |
+
|
| 22 |
+
return model, index
|
| 23 |
+
|
| 24 |
+
def retrieve_chunks(query, model, index, chunks, k): #k is number of chunks we want to extract, the more k, better the answer but slower the process
|
| 25 |
+
if index is None:
|
| 26 |
+
raise ValueError("FAISS index has not been built.")
|
| 27 |
+
|
| 28 |
+
query_embedding = model.encode([query])
|
| 29 |
+
distances, indices = index.search(
|
| 30 |
+
np.array(query_embedding).astype("float32"),
|
| 31 |
+
k
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
return [chunks[i] for i in indices[0]] #since we have only 1 query, get 0th item from list of indices [[chunk1, chunk2]]
|