TRaw commited on
Commit
f4d16d5
·
1 Parent(s): 5ad3123

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio
2
+ import os
3
+ from litellm import completion
4
+ os.environ["OPENROUTER_API_KEY"] = "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"
5
+
6
+ def inference(message, history):
7
+ try:
8
+ flattened_history = [item for sublist in history for item in sublist]
9
+ full_message = " ".join(flattened_history + [message])
10
+ messages_litellm = [{"role": "user", "content": full_message}] # litellm message format
11
+ partial_message = ""
12
+ for chunk in litellm.completion(model="openrouter/meta-llama/llama-2-13b-chat",
13
+ api_base="10.213.21.138:56928",
14
+ messages=messages_litellm,
15
+ max_new_tokens=512,
16
+ temperature=.7,
17
+ top_k=100,
18
+ top_p=.9,
19
+ repetition_penalty=1.18,
20
+ stream=True):
21
+ partial_message += chunk['choices'][0]['delta']['content'] # extract text from streamed litellm chunks
22
+ yield partial_message
23
+ except Exception as e:
24
+ print("Exception encountered:", str(e))
25
+ yield f"An Error occured please 'Clear' the error and try your question again"
26
+
27
+ gr.ChatInterface(
28
+ inference,
29
+ chatbot=gr.Chatbot(height=400),
30
+ textbox=gr.Textbox(placeholder="Enter text here...", container=False, scale=5),
31
+ description=f"""
32
+ CURRENT PROMPT TEMPLATE: {model_name}.
33
+ An incorrect prompt template will cause performance to suffer.
34
+ Check the API specifications to ensure this format matches the target LLM.""",
35
+ title="Simple Chatbot Test Application",
36
+ examples=["Define 'deep learning' in once sentence."],
37
+ retry_btn="Retry",
38
+ undo_btn="Undo",
39
+ clear_btn="Clear",
40
+ theme=theme,
41
+ ).queue().launch()