Spaces:
Sleeping
Sleeping
| # import gradio as gr | |
| # import os | |
| # gr.load("models/google/gemma-1.1-7b-it", hf_token=os.environ.get("YOUR_API_TOKEN"), streaming=True).launch() | |
| import gradio as gr | |
| import os | |
| os.system('pip install openai') | |
| from openai import OpenAI | |
| client = OpenAI( | |
| base_url="https://api-inference.huggingface.co/v1", | |
| api_key=os.environ.get('YOUR_API_TOKEN') | |
| ) | |
| def predict(message, history, test=""): | |
| print("1 ", message) | |
| print("2 ", history) | |
| history_openai_format = [] | |
| for human, assistant in history: | |
| history_openai_format.append({"role": "user", "content": human }) | |
| history_openai_format.append({"role": "assistant", "content":assistant}) | |
| history_openai_format.append({"role": "user", "content": message}) | |
| response = client.chat.completions.create(model='meta-llama/Meta-Llama-3-8B-Instruct', | |
| # response = client.chat.completions.create(model='nvidia/Llama3-ChatQA-1.5-8B', | |
| messages= history_openai_format, | |
| temperature=0.7, | |
| stream=True, | |
| max_tokens=3000) | |
| partial_message = "" | |
| for chunk in response: | |
| if chunk.choices[0].delta.content is not None: | |
| partial_message = partial_message + chunk.choices[0].delta.content | |
| yield partial_message | |
| gr.ChatInterface(predict).launch() |