File size: 7,136 Bytes
8f624ec e5eceb4 8f624ec e5eceb4 8f624ec e5eceb4 8f624ec e5eceb4 38e0289 e5eceb4 38e0289 e5eceb4 38e0289 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | # import evaluate
# bleu = evaluate.load("bleu")
# sacrebleu = evaluate.load("sacrebleu")
# rouge = evaluate.load("rouge")
# wer = evaluate.load("wer")
# import json
# from typing import List, Dict
# from nltk.translate.bleu_score import corpus_bleu
# rag_pred = ["To construct a benchmark dataset for early rumor detection (ERD), gather as many early relevant posts as possible from fact-checking websites, focusing on claims. A novel ERD model based on Neural Hawkes Processes can guide a generic rumor detection model to make timely, accurate, and stable predictions by constructing a detection stability distribution over expected future predictions based on prior and current predictions. This allows for an optimal time point to be fixed for detection without delay."]
# llm_pred = ["For constructing a benchmark dataset, consider diversity, representativeness, and time-sensitivity. Incorporate various social media platforms, rumor types, and linguistic styles. A novel model based on Neural Hawkes processes can enhance rumor detection by modeling the temporal dependencies among micro-events, capturing crucial patterns for early rumor detection, and thus improving accuracy and timeliness."]
# refs = [["The optimal approach for constructing a benchmark dataset for early rumor detection is to gather early relevant posts from fact-checking websites to capture the actual early-stage information. Additionally, a novel model based on Neural Hawkes processes, \"HEARD\", can improve the accuracy and timeliness of rumor detection by guiding generic rumor detection models to make timely and stable predictions."]]
# rag_sacrebleu_score = sacrebleu.compute(predictions=rag_pred, references=refs)
# llm_sacrebleu_score = sacrebleu.compute(predictions=llm_pred, references=refs)
# print(f"RAG BLEU: {rag_sacrebleu_score}\nLLM BLEU: {llm_sacrebleu_score}")
from pprint import pprint as print
# rel = [{'doc': 'predictive models especially when formula is an essential '
# 'differentiating part of a task conclusion future work we proposed an '
# 'adaptation of an nlp technique liu et al 2017 from the field of '
# 'machine comprehension to the area of mathematical educational data '
# 'mining we enrich the content representation by parsing mathematical '
# 'formulas into syntax trees and embedding them with neural networks '
# 'our experiments validate the approach using publicly available '
# 'datasets and show that incorporating syntactic information can '
# 'improve performance in predicting the difficulty of an exercise '
# 'these results suggest that the method may be of interest for '
# 'personalised learning solutions we',
# 'metadata': {'title': 'structural information in mathematical formulas for '
# 'exercise difficulty prediction a comparison of nlp '
# 'representations',
# 'url': 'https://aclanthology.org/2022.bea-1.14'},
# 'score': 0.2975524663925171},
# {'doc': 'monitoring validation loss with the patience of 3 epochs results we '
# 'compare data representations to investigate whether adding syntactic '
# 'sequences improves classification performance performance was '
# 'evaluated using 10fold stratified crossvalidation roc auc and is '
# 'shown in table 1 regarding the baselines majority and random '
# 'baselines produce roc auc of 05 on a single run and the best results '
# 'of logistic regression models trained on the length of input '
# 'sequences are 057 for math on descriptions and 066 for deepmind on '
# 'formula respectively regarding other possible neural approaches to '
# 'feature engineering using word2vec algorithm mikolov et al 2013 to '
# 'produce pretrained',
# 'metadata': {'title': 'structural information in mathematical formulas for '
# 'exercise difficulty prediction a comparison of nlp '
# 'representations',
# 'url': 'https://aclanthology.org/2022.bea-1.14'},
# 'score': 0.3195769786834717},
# {'doc': 'using reinforcement learning wang and jin 2019 adversarial learning '
# 'wang et al 2021b wang et al 2020b and also the multimodel structure '
# 'to handle the unknown entities in question answering wang et al 2018 '
# 'wang et al 2020a coreference understanding wang et al 2021a is also '
# 'another research direction in designing questionanswering systems '
# 'conclusion in this paper we introduce a novel mrpqa knowledge based '
# 'question answering system which can leverage information from mrps '
# 'to train our model we use a marginalized probability objective '
# 'function experimental results show that our model achieve strong '
# 'performance on popular kbqa datasets',
# 'metadata': {'title': 'a new concept of knowledge based question answering '
# 'kbqa system for multihop reasoning',
# 'url': 'https://aclanthology.org/2022.naacl-main.294'},
# 'score': 0.3206987977027893}]
# url = [result['metadata']['url'] for result in rel]
# context = []
# for result in rel:
# context.append(f'{result["doc"]}=={result["metadata"]["url"]}')
# print(context)
# from .chroma import search
# q = 'What is Retrieval Augmented Generation'
# results = search("What is Retrieval Augmented Generation", 3)
# if results:
# for result in results:
# print(result)
# else:
# print("No relevant documents found.")
# import os
# from .settings import MODELS_DIR
# from sentence_transformers import SentenceTransformer
# import numpy as np
# model = SentenceTransformer(os.path.join(MODELS_DIR, 'bge-large_finetuned'))
# embeddings: np.ndarray = model.encode(sentences=q, device='cpu', show_progress_bar=True)
# # print(embeddings)
import re
def extract_intent(text):
"""
Extracts the intent from the given text.
Args:
text (str): The text to search for an intent.
Returns:
str: The extracted intent ('open-ended', 'query', 'out of scope') if found, otherwise None.
"""
# Define the regex pattern to match any of the intents
pattern = r'\b(open-ended|query|out of scope)\b'
# Search for the pattern in the input text (case insensitive)
match = re.search(pattern, text, re.IGNORECASE)
# Return the matched intent if found, else None
if match:
return match.group(1).lower()
else:
return None
# Example usage:
response = "This is an open-ended question, so it should be classified as such."
print(extract_intent(response)) # Output: "open-ended"
response = "Please classify this query as a question."
print(extract_intent(response)) # Output: "query"
response = "I am sorry, but this request is out of scope for me to handle."
print(extract_intent(response)) # Output: "out of scope"
response = "This is a completely unrelated response."
print(extract_intent(response)) # Output: None
|