import tempfile

import pytest

from stanza.models.common.utils import misc_to_space_after, space_after_to_misc
from stanza.models.constituency import tree_reader
from stanza.server import java_protobuf_requests
from stanza.tests import *
from stanza.utils.conll import CoNLL
from stanza.protobuf import DependencyGraph

pytestmark = [pytest.mark.travis, pytest.mark.pipeline]

def check_tree(proto_tree, py_tree, py_score):
    tree, tree_score = java_protobuf_requests.from_tree(proto_tree)
    assert tree_score == py_score
    assert tree == py_tree

def test_build_tree():
    text="((S (VP (VB Unban)) (NP (NNP Mox) (NNP Opal))))\n( (SBARQ (WHNP (WP Who)) (SQ (VP (VBZ sits) (PP (IN in) (NP (DT this) (NN seat))))) (. ?)))"
    trees = tree_reader.read_trees(text)
    assert len(trees) == 2

    for tree in trees:
        proto_tree = java_protobuf_requests.build_tree(trees[0], 1.0)
        check_tree(proto_tree, trees[0], 1.0)


ESTONIAN_EMPTY_DEPS = """
# sent_id = ewtb2_000035_15
# text = Ja paari aasta pärast rôômalt maasikatele ...
1	Ja	ja	CCONJ	J	_	3	cc	5.1:cc	_
2	paari	paar	NUM	N	Case=Gen|Number=Sing|NumForm=Word|NumType=Card	3	nummod	3:nummod	_
3	aasta	aasta	NOUN	S	Case=Gen|Number=Sing	0	root	5.1:obl	_
4	pärast	pärast	ADP	K	AdpType=Post	3	case	3:case	_
5	rôômalt	rõõmsalt	ADV	D	Typo=Yes	3	advmod	5.1:advmod	Orphan=Yes|CorrectForm=rõõmsalt
5.1	panna	panema	VERB	V	VerbForm=Inf	_	_	0:root	Empty=5.1
6	maasikatele	maasikas	NOUN	S	Case=All|Number=Plur	3	obl	5.1:obl	Orphan=Yes
7	...	...	PUNCT	Z	_	3	punct	5.1:punct	_
""".strip()


def test_convert_networkx_graph():
    doc = CoNLL.conll2doc(input_str=ESTONIAN_EMPTY_DEPS, ignore_gapping=False)
    deps = doc.sentences[0]._enhanced_dependencies

    graph = DependencyGraph()
    java_protobuf_requests.convert_networkx_graph(graph, doc.sentences[0], 0)
    assert len(graph.rootNode) == 1
    assert graph.rootNode[0] == 0
    nodes = sorted([(x.index, x.emptyIndex) for x in graph.node])
    expected_nodes = [(1,0), (2,0), (3,0), (4,0), (5,0), (5,1), (6,0), (7,0)]
    assert nodes == expected_nodes

    edges = [(x.target, x.dep) for x in graph.edge if x.source == 5 and x.sourceEmpty == 1]
    edges = sorted(edges)
    expected_edges = [(1, 'cc'), (3, 'obl'), (5, 'advmod'), (6, 'obl'), (7, 'punct')]
    assert edges == expected_edges

ENGLISH_NBSP_SAMPLE="""
# sent_id = newsgroup-groups.google.com_n3td3v_e874a1e5eb995654_ENG_20060120_052200-0011
# text = Please note that neither the e-mail address nor name of the sender have been verified.
1	Please	please	INTJ	UH	_	2	discourse	_	_
2	note	note	VERB	VB	Mood=Imp|VerbForm=Fin	0	root	_	_
3	that	that	SCONJ	IN	_	15	mark	_	_
4	neither	neither	CCONJ	CC	_	7	cc:preconj	_	_
5	the	the	DET	DT	Definite=Def|PronType=Art	7	det	_	_
6	e-mail	e-mail	NOUN	NN	Number=Sing	7	compound	_	_
7	address	address	NOUN	NN	Number=Sing	15	nsubj:pass	_	_
8	nor	nor	CCONJ	CC	_	9	cc	_	_
9	name	name	NOUN	NN	Number=Sing	7	conj	_	_
10	of	of	ADP	IN	_	12	case	_	_
11	the	the	DET	DT	Definite=Def|PronType=Art	12	det	_	_
12	sender	sender	NOUN	NN	Number=Sing	7	nmod	_	_
13	have	have	AUX	VBP	Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin	15	aux	_	SpacesAfter=\\u00A0
14	been	be	AUX	VBN	Tense=Past|VerbForm=Part	15	aux:pass	_	_
15	verified	verify	VERB	VBN	Tense=Past|VerbForm=Part|Voice=Pass	2	ccomp	_	SpaceAfter=No
16	.	.	PUNCT	.	_	2	punct	_	_
""".strip()

def test_nbsp_doc():
    """
    Test that the space conversion methods will convert to and from NBSP
    """
    doc = CoNLL.conll2doc(input_str=ENGLISH_NBSP_SAMPLE)

    assert doc.sentences[0].text == "Please note that neither the e-mail address nor name of the sender have been verified."
    assert doc.sentences[0].tokens[12].spaces_after == " "
    assert misc_to_space_after("SpacesAfter=\\u00A0") == ' '
    assert space_after_to_misc(' ') == "SpacesAfter=\\u00A0"

    conllu = "{:C}".format(doc)
    assert conllu == ENGLISH_NBSP_SAMPLE