Spaces:
Sleeping
Sleeping
| import spacy | |
| from spacy.tokens import Doc | |
| import coreferee | |
| # Load spaCy model | |
| nlp = spacy.load('en_core_web_sm') | |
| nlp.add_pipe("coreferee") | |
| # Register the custom extension attribute | |
| Doc.set_extension('resolved_text', default=None, force=True) | |
| def resolve_coreferences(query_text, conversation_history): | |
| """ | |
| Resolve coreferences in the given text using spaCy and coreferee. | |
| Args: | |
| query_text (str): The current query to resolve | |
| conversation_history (list): List of dictionaries containing previous conversation turns | |
| Returns: | |
| str: Text with resolved coreferences | |
| """ | |
| # Combine conversation history and current query | |
| combined_text = [] | |
| for turn in conversation_history: | |
| combined_text.append(f"User: {turn['user']}") | |
| combined_text.append(f"Bot: {turn['Bot']}") | |
| combined_text.append(f"User: {query_text}") | |
| text = "\n".join(combined_text) | |
| # Process the text | |
| doc = nlp(text) | |
| # Get all tokens and their potential antecedents | |
| resolved_tokens = list(doc) | |
| # Resolve coreferences | |
| for chain in doc._.coref_chains: | |
| for mention in chain: | |
| if mention.root_index != chain.most_specific.root_index: | |
| # Replace mention with its antecedent | |
| resolved_tokens[mention.root_index] = doc[chain.most_specific.root_index] | |
| # Reconstruct the text with resolved references | |
| resolved_text = "".join([token.text_with_ws if isinstance(token, spacy.tokens.Token) | |
| else token.text + " " for token in resolved_tokens]) | |
| # Extract the resolved query (last line) | |
| resolved_query = resolved_text.split('\n')[-1].replace("User: ", "").strip() | |
| return resolved_query |