import random import streamlit as st def query_collection(collection, query='', nresults=3, context_multiplier=2, sim_th=None): """Get relevant text from a collection for a given query""" query_result = collection.query(query_texts=query, n_results=nresults*context_multiplier) docs = query_result.get('documents')[0] if sim_th is not None: similarities = [1 - d for d in query_result.get("distances")[0]] relevant_docs = [d for d, s in zip(docs, similarities) if s >= sim_th] return ''.join(relevant_docs) return docs def get_chapter_context(chapters, chapter_number, n_questions): chapter = chapters[chapter_number] print(chapter.keys()) if chapter is None: raise ValueError(f"Chapter {chapter_number} not found in the chapters list.") if 'chunks' not in chapter: raise ValueError(f"Chapter {chapter_number} does not contain 'text' key.") n_chunks = len(chapter['chunks']) if n_chunks == 0: raise ValueError(f"Chapter {chapter_number} has no chunks to process.") chunks_indices = random.sample(range(n_chunks), min(n_questions, n_chunks)) st.session_state['chapter_selected_chunks'] = [chapter['chunks'][i] for i in chunks_indices]