File size: 1,254 Bytes
1d8ed3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import random
import streamlit as st


def query_collection(collection, query='', nresults=3, context_multiplier=2, sim_th=None):
    """Get relevant text from a collection for a given query"""

    query_result = collection.query(query_texts=query, n_results=nresults*context_multiplier)
    docs = query_result.get('documents')[0]

    if sim_th is not None:
        similarities = [1 - d for d in query_result.get("distances")[0]]
        relevant_docs = [d for d, s in zip(docs, similarities) if s >= sim_th]
        return ''.join(relevant_docs)
    return docs


def get_chapter_context(chapters, chapter_number, n_questions):
    chapter = chapters[chapter_number]
    print(chapter.keys())
    if chapter is None:
        raise ValueError(f"Chapter {chapter_number} not found in the chapters list.")
    if 'chunks' not in chapter:
        raise ValueError(f"Chapter {chapter_number} does not contain 'text' key.")
    
    n_chunks = len(chapter['chunks'])
    if n_chunks == 0:
        raise ValueError(f"Chapter {chapter_number} has no chunks to process.")
    
    chunks_indices = random.sample(range(n_chunks), min(n_questions, n_chunks))
    st.session_state['chapter_selected_chunks'] = [chapter['chunks'][i] for i in chunks_indices]