elseodelasgalletas's picture
Update app.py
75a710d verified
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from huggingface_hub import InferenceClient
import gradio as gr
client = InferenceClient('meta-llama/Meta-Llama-3-8B-Instruct')
def funcion(prompt, historial):
chat = f'<|begin_of_text|><|start_header_id|>system<|end_header_id|>Responde en español con párrafos cortos. Para una mejora de la lectura y comprensión utiliza un formato Markdown enriquecido con # encabezados, ## sub encabezados, **texto enfatizado**, listas, tablas, etc.<|eot_id|>\n\n'
for usuario, chatbot in historial:
chat += f'<|start_header_id|>user<|end_header_id|>{usuario}<|eot_id|>\n\n{chatbot}'
chat += f'<|start_header_id|>user<|end_header_id|>{prompt}<|eot_id|>\n\n'
kwargs = dict(
temperature=0.80,
max_new_tokens=2048,
top_p=0.95,
repetition_penalty=1.0,
seed=1337
)
output = ''
while output == '':
try:
stream = client.text_generation(chat, **kwargs, stream=True, details=True, return_full_text=False)
for response in stream:
output += response.token.text.replace('<|eot_id|>', '')
yield output
except:
pass
return output
interfaz = gr.ChatInterface(
fn=funcion,
chatbot=gr.Chatbot(
avatar_images=None,
container=False,
show_copy_button=True,
layout='bubble',
render_markdown=True,
line_breaks=True
),
css='h1 {font-size:22px;h2 {font-size:20px;h3 {font-size:18px;h4 {font-size:16px;}',
autofocus=True,
fill_height=True,
analytics_enabled=False,
submit_btn='Chat',
stop_btn=None,
retry_btn=None,
undo_btn=None,
clear_btn=None
)
interfaz.launch()