Spaces:
Configuration error
Configuration error
| version: ignore | |
| components: | |
| - name: DocumentStore | |
| type: ElasticsearchDocumentStore | |
| params: | |
| host: localhost | |
| - name: Retriever # Selects the most relevant documents from the document store and passes them on to the Reader | |
| type: EmbeddingRetriever # Uses a Transformer model to encode the document and the query | |
| params: | |
| document_store: DocumentStore | |
| embedding_model: sentence-transformers/multi-qa-mpnet-base-dot-v1 # multi-qa-MiniLM-L6-dot-v1 | |
| embed_meta_fields: | |
| - filename | |
| top_k: 10 # The number of results to return | |
| - name: BM25 | |
| type: BM25Retriever | |
| params: | |
| document_store: DocumentStore | |
| top_k: 10 | |
| - name: Joiner | |
| type: JoinDocuments | |
| params: | |
| join_mode: reciprocal_rank_fusion | |
| - name: Reader # The component that actually fetches answers from among the 20 documents returned by retriever | |
| type: FARMReader # Transformer-based reader, specializes in extractive QA | |
| params: | |
| model_name_or_path: dmis-lab/biobert-large-cased-v1.1-squad # dmis-lab/biobert-base-cased-v1.1-squad | |
| context_window_size: 700 # The size of the window around the answer span | |
| - name: FileTypeClassifier # Routes files based on their extension to appropriate converters, by default txt, pdf, md, docx, html | |
| type: FileTypeClassifier | |
| - name: TextConverter # Converts files into documents | |
| type: TextConverter | |
| - name: PDFConverter # Converts PDFs into documents | |
| type: PDFToTextConverter | |
| - name: Preprocessor # Splits documents into smaller ones and cleans them up | |
| type: PreProcessor | |
| params: | |
| # With a vector-based retriever, it's good to split your documents into smaller ones | |
| split_by: word # The unit by which you want to split the documents | |
| split_length: 250 # The max number of words in a document | |
| split_overlap: 20 # Enables the sliding window approach | |
| split_respect_sentence_boundary: True # Retains complete sentences in split documents | |
| language: en # Used by NLTK to best detect the sentence boundaries for that language | |
| # Here you define how the nodes are organized in the pipelines | |
| # For each node, specify its input | |
| pipelines: | |
| - name: query | |
| nodes: | |
| - name: Retriever | |
| inputs: [Query] | |
| - name: BM25 | |
| inputs: [Query] | |
| - name: Joiner | |
| inputs: [Retriever, BM25] | |
| - name: Reader | |
| inputs: [Joiner] | |
| - name: indexing | |
| nodes: | |
| # Depending on the file type, we use a Text or PDF converter | |
| - name: FileTypeClassifier | |
| inputs: [File] | |
| - name: TextConverter | |
| inputs: [FileTypeClassifier.output_1] # Ensures this converter receives TXT files | |
| - name: PDFConverter | |
| inputs: [FileTypeClassifier.output_2] # Ensures this converter receives PDFs | |
| - name: Preprocessor | |
| inputs: [TextConverter, PDFConverter] | |
| - name: Retriever | |
| inputs: [Preprocessor] | |
| - name: DocumentStore | |
| inputs: [Retriever] | |