Spaces:
Running
Running
File size: 3,337 Bytes
742582e 47e635d 742582e 47e635d 742582e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# app.py
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
from threading import Thread
import gradio as gr
import re
import torch
from openai import OpenAI
#client = OpenAI(
# api_key="sk-420ab66020704eabbe37501ec39b7a2b",
# base_url="https://bailingchat.alipay.com",
#)
client = OpenAI(
api_key="sk-evmlzmwzibqqipnpetyryfxxrbsxeucctkrbppdevuyjvont",
base_url="https://api.siliconflow.cn/v1",
)
# define chat function
def chat(user_input, max_tokens=11264):
# chat history
messages_template = [
# {"role": "system", "content": "You are Ling, an assistant created by inclusionAI"},
{"role": "system", "content": "## 你是谁\n\n 我是百灵(Ling),一个由蚂蚁集团(Ant Group) 开发的AI智能助手"},
{"role": "user", "content": user_input}
]
response = client.chat.completions.create(
model="inclusionAI/Ling-mini-2.0",
messages=messages_template,
max_tokens=max_tokens,
temperature=0.7,
presence_penalty=1.5,
top_p=1,
stream=True,
)
def generate():
pass
resp_text = ""
thread = Thread(target=generate)
thread.start()
for chunk in response:
if chunk.choices[0].delta.content is not None:
resp_text += chunk.choices[0].delta.content
yield resp_text
print(resp_text)
thread.join()
# Create a custom layout using Blocks
with gr.Blocks(css="""
#markdown-output {
height: 300px;
overflow-y: auto;
border: 1px solid #ddd;
padding: 10px;
}
""") as demo:
gr.Markdown(
"## Ling-mini-2.0 AI Assistant\n"
"Based on [inclusionAI/Ling-mini-2.0](https://huggingface.co/inclusionAI/Ling-mini-2.0)\n"
# "Access through [Ling API](https://bailingchat.alipay.com)"
)
with gr.Row():
max_tokens_slider = gr.Slider(minimum=5000, maximum=10000, step=100, label="Generated length")
# output_box = gr.Textbox(lines=10, label="Response")
output_box = gr.Markdown(label="Response", elem_id="markdown-output")
input_box = gr.Textbox(lines=8, label="Input you question")
examples = gr.Examples(
examples=[
["Introducing the basic concepts of large language models"],
["How to solve long context dependencies in math problems?"]
],
inputs=input_box
)
interface = gr.Interface(
fn=chat,
inputs=[input_box, max_tokens_slider],
outputs=output_box,
live=False # disable auto-triggering on input change
)
# launch Gradio Service
demo.queue()
demo.launch()
# Construct Gradio Interface
#interface = gr.Interface(
# fn=chat,
# inputs=[
# gr.Textbox(lines=8, label="输入你的问题"),
# gr.Slider(minimum=100, maximum=102400, step=50, label="生成长度")
# ],
# outputs=[
# gr.Textbox(lines=8, label="模型回复")
# ],
# title="Ling-lite-2.0 AI助手",
# description="基于 [inclusionAI/Ling-mini-2.0](https://huggingface.co/inclusionAI/Ling-mini-2.0) 的对话式文本生成演示。",
# examples=[
# ["介绍大型语言模型的基本概念"],
# ["如何解决数学问题中的长上下文依赖?"]
# ]
#)
# launch Gradion Service
#interface.launch()
|