Spaces:
Runtime error
Runtime error
| # Copyright (c) Microsoft Corporation. | |
| # Licensed under the MIT license. | |
| import re | |
| from io import StringIO | |
| import tokenize | |
| def remove_comments_and_docstrings(source,lang): | |
| if lang in ['python']: | |
| """ | |
| Returns 'source' minus comments and docstrings. | |
| """ | |
| io_obj = StringIO(source) | |
| out = "" | |
| prev_toktype = tokenize.INDENT | |
| last_lineno = -1 | |
| last_col = 0 | |
| for tok in tokenize.generate_tokens(io_obj.readline): | |
| token_type = tok[0] | |
| token_string = tok[1] | |
| start_line, start_col = tok[2] | |
| end_line, end_col = tok[3] | |
| ltext = tok[4] | |
| if start_line > last_lineno: | |
| last_col = 0 | |
| if start_col > last_col: | |
| out += (" " * (start_col - last_col)) | |
| # Remove comments: | |
| if token_type == tokenize.COMMENT: | |
| pass | |
| # This series of conditionals removes docstrings: | |
| elif token_type == tokenize.STRING: | |
| if prev_toktype != tokenize.INDENT: | |
| # This is likely a docstring; double-check we're not inside an operator: | |
| if prev_toktype != tokenize.NEWLINE: | |
| if start_col > 0: | |
| out += token_string | |
| else: | |
| out += token_string | |
| prev_toktype = token_type | |
| last_col = end_col | |
| last_lineno = end_line | |
| temp=[] | |
| for x in out.split('\n'): | |
| if x.strip()!="": | |
| temp.append(x) | |
| return '\n'.join(temp) | |
| elif lang in ['ruby']: | |
| return source | |
| else: | |
| def replacer(match): | |
| s = match.group(0) | |
| if s.startswith('/'): | |
| return " " # note: a space and not an empty string | |
| else: | |
| return s | |
| pattern = re.compile( | |
| r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', | |
| re.DOTALL | re.MULTILINE | |
| ) | |
| temp=[] | |
| for x in re.sub(pattern, replacer, source).split('\n'): | |
| if x.strip()!="": | |
| temp.append(x) | |
| return '\n'.join(temp) | |
| def tree_to_token_index(root_node): | |
| if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment': | |
| return [(root_node.start_point,root_node.end_point)] | |
| else: | |
| code_tokens=[] | |
| for child in root_node.children: | |
| code_tokens+=tree_to_token_index(child) | |
| return code_tokens | |
| def tree_to_variable_index(root_node,index_to_code): | |
| if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment': | |
| index=(root_node.start_point,root_node.end_point) | |
| _,code=index_to_code[index] | |
| if root_node.type!=code: | |
| return [(root_node.start_point,root_node.end_point)] | |
| else: | |
| return [] | |
| else: | |
| code_tokens=[] | |
| for child in root_node.children: | |
| code_tokens+=tree_to_variable_index(child,index_to_code) | |
| return code_tokens | |
| def index_to_code_token(index,code): | |
| start_point=index[0] | |
| end_point=index[1] | |
| if start_point[0]==end_point[0]: | |
| s=code[start_point[0]][start_point[1]:end_point[1]] | |
| else: | |
| s="" | |
| s+=code[start_point[0]][start_point[1]:] | |
| for i in range(start_point[0]+1,end_point[0]): | |
| s+=code[i] | |
| s+=code[end_point[0]][:end_point[1]] | |
| return s | |