brainsqueeze's picture
Update ES sources config
64b9f1e verified
raw
history blame
2.22 kB
from ask_candid.base.retrieval.schemas import ElasticSourceConfig
CandidBlogConfig = ElasticSourceConfig(
index_name="search-semantic-blog",
semantic_fields=("semantic_title_summary_tags_text", "semantic_authors_text", "semantic_content"),
text_fields=("title", "summary", "content", "authors_text"),
highlight_fields=("semantic_content",),
excluded_fields=("content",)
)
CandidHelpConfig = ElasticSourceConfig(
index_name="search-semantic-help",
semantic_fields=("semantic_content", "semantic_title_summary_question_category"),
text_fields=("title", "summary", "content_question"),
highlight_fields=("semantic_content",),
excluded_fields=("content_html", "content")
)
CandidLearningConfig = ElasticSourceConfig(
index_name="search-semantic-learning",
semantic_fields=("semantic_title_short_description", "semantic_lessons_description","semantic_lessons_content"),
text_fields=("title", "short_description", "lesson_list.description", "lessson_content.content"),
highlight_fields=("semantic_lessons_content",),
excluded_fields=(
"lesson_content.content_html",
"lesson_list.description_html",
"semantic_lessons_content",
"semantic_lessons_description",
"lesson_content.content",
"lesson_list.description"
)
)
CandidNewsConfig = ElasticSourceConfig(
index_name="news_1",
semantic_fields=("title", "content")
)
# IssueLabConfig = ElasticSourceConfig(
# index_name="search-semantic-issuelab-elser_ve2",
# semantic_fields=("description", "content", "combined_issuelab_findings", "combined_item_description")
# )
IssueLabConfig = ElasticSourceConfig(
index_name="issuelab_prod_data",
# semantic_fields=("title", "description", "content"),
semantic_fields=("title", "description", "content^0.3"),
highlight_fields=("description", "content")
)
YoutubeConfig = ElasticSourceConfig(
index_name="search-semantic-youtube",
semantic_fields=("semantic_title", "semantic_description","semantic_cc_text"),
text_fields=("title", "description", "cc_text"),
highlight_fields=("semantic_cc_text",),
excluded_fields=("cc_text", "semantic_cc_text", "semantic_title")
)