egmaminta's picture
Updated app description
06b37e4
raw
history blame
2.54 kB
from transformers import AutoTokenizer, AutoModelWithLMHead, SummarizationPipeline
import gradio as gr
import tokenize
import io
pipeline = SummarizationPipeline(
model=AutoModelWithLMHead.from_pretrained("SEBIS/code_trans_t5_large_source_code_summarization_python_multitask_finetune"),
tokenizer=AutoTokenizer.from_pretrained("SEBIS/code_trans_t5_large_source_code_summarization_python_multitask_finetune", skip_special_tokens=True),
device=-1)
def code_summarizer(code):
def code_tokenizer(line):
result= []
line = io.StringIO(line)
for toktype, tok, start, end, line in tokenize.generate_tokens(line.readline):
if (not toktype == tokenize.COMMENT):
if toktype == tokenize.STRING:
result.append("CODE_STRING")
elif toktype == tokenize.NUMBER:
result.append("CODE_INTEGER")
elif (not tok=="\n") and (not tok==" "):
result.append(str(tok))
return ' '.join(result)
tokenized_code = code_tokenizer(code)
summary = pipeline(tokenized_code)
return summary[0]['summary_text']
def call_examples():
examples = [['''def findAverage(list): sum = 0. for x in list: sum = sum + x average = sum / len(list) return average'''],
['''def findMax(list): max = list[0] for x in list: if x > max: max = x return max'''],
['''def findRange(list): return max(list)-min(list)''']]
return examples
gr.Interface(fn=code_summarizer,
inputs=gr.inputs.Textbox(
lines=5,
default='',
placeholder='Insert a Python code here',
label='PYTHON CODE'),
outputs=gr.outputs.Textbox(
type='auto',
label='CODE SUMMARY'),
title='Code Summarizer From CodeTrans',
description='Summarize any Python code',
article='Pretrained model on programming language python using the t5 large model architecture. This model is trained on tokenized python code functions: it works best with tokenized python functions. This CodeTrans model is based on the t5-large model. It has its own SentencePiece vocabulary model. It used multi-task training on 13 supervised tasks in the software development domain and 7 unsupervised datasets. It is then fine-tuned on the source code summarization task for the python code snippets.',
theme='dark-peach',
live=True,
examples=call_examples(),
allow_flagging='never').launch(inbrowser=True)