Spaces:
Runtime error
Runtime error
File size: 1,923 Bytes
f4d16d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import gradio
import os
from litellm import completion
os.environ["OPENROUTER_API_KEY"] = "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"
def inference(message, history):
try:
flattened_history = [item for sublist in history for item in sublist]
full_message = " ".join(flattened_history + [message])
messages_litellm = [{"role": "user", "content": full_message}] # litellm message format
partial_message = ""
for chunk in litellm.completion(model="openrouter/meta-llama/llama-2-13b-chat",
api_base="10.213.21.138:56928",
messages=messages_litellm,
max_new_tokens=512,
temperature=.7,
top_k=100,
top_p=.9,
repetition_penalty=1.18,
stream=True):
partial_message += chunk['choices'][0]['delta']['content'] # extract text from streamed litellm chunks
yield partial_message
except Exception as e:
print("Exception encountered:", str(e))
yield f"An Error occured please 'Clear' the error and try your question again"
gr.ChatInterface(
inference,
chatbot=gr.Chatbot(height=400),
textbox=gr.Textbox(placeholder="Enter text here...", container=False, scale=5),
description=f"""
CURRENT PROMPT TEMPLATE: {model_name}.
An incorrect prompt template will cause performance to suffer.
Check the API specifications to ensure this format matches the target LLM.""",
title="Simple Chatbot Test Application",
examples=["Define 'deep learning' in once sentence."],
retry_btn="Retry",
undo_btn="Undo",
clear_btn="Clear",
theme=theme,
).queue().launch() |