Spaces:
Build error
Build error
Commit
·
4258e9c
1
Parent(s):
6ffc9b2
replaced tokenizer json
Browse files
app.py
CHANGED
|
@@ -7,11 +7,13 @@ import tree_sitter_languages # Pre-built parsers for multiple languages
|
|
| 7 |
from tokenizers import Tokenizer
|
| 8 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
| 9 |
|
|
|
|
| 10 |
tokenizer = Tokenizer.from_file("syntax_bpe_tokenizer.json") # New BPE tokenizer
|
| 11 |
model = tf.keras.models.load_model("crv3.keras") # CNN model
|
| 12 |
|
| 13 |
parser = Parser()
|
| 14 |
-
|
|
|
|
| 15 |
|
| 16 |
def syntax_aware_tokenize(code):
|
| 17 |
"""Tokenizes Java code using Tree-Sitter (AST-based)."""
|
|
|
|
| 7 |
from tokenizers import Tokenizer
|
| 8 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
| 9 |
|
| 10 |
+
|
| 11 |
tokenizer = Tokenizer.from_file("syntax_bpe_tokenizer.json") # New BPE tokenizer
|
| 12 |
model = tf.keras.models.load_model("crv3.keras") # CNN model
|
| 13 |
|
| 14 |
parser = Parser()
|
| 15 |
+
java_lang = tree_sitter_languages.get_language("java") # Get Java language object
|
| 16 |
+
parser.set_language(java_lang) # Set it for the parser
|
| 17 |
|
| 18 |
def syntax_aware_tokenize(code):
|
| 19 |
"""Tokenizes Java code using Tree-Sitter (AST-based)."""
|