| | import gradio as gr |
| | from huggingface_hub import InferenceClient |
| | from transformers import AutoTokenizer, AutoModelForCausalLM |
| | import torch |
| |
|
| | |
| | client = InferenceClient("alpindale/WizardLM-2-8x22B") |
| |
|
| | |
| | model_path = "alpindale/WizardLM-2-8x22B" |
| | tokenizer = AutoTokenizer.from_pretrained(model_path) |
| | model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto").eval() |
| |
|
| | def respond( |
| | message, |
| | history: list[tuple[str, str]], |
| | system_message, |
| | max_tokens, |
| | temperature, |
| | top_p, |
| | use_local_model: bool, |
| | ): |
| | |
| | messages = [{"role": "system", "content": system_message}] |
| | for user, assistant in history: |
| | if user: |
| | messages.append({"role": "user", "content": user}) |
| | if assistant: |
| | messages.append({"role": "assistant", "content": assistant}) |
| | messages.append({"role": "user", "content": message}) |
| |
|
| | if use_local_model: |
| | |
| | input_text = "\n".join([f"{m['role']}: {m['content']}" for m in messages]) |
| | input_ids = tokenizer.encode(input_text, return_tensors="pt") |
| | input_ids = input_ids.to(model.device) |
| | |
| | with torch.no_grad(): |
| | output = model.generate( |
| | input_ids, |
| | max_new_tokens=max_tokens, |
| | temperature=temperature, |
| | top_p=top_p, |
| | do_sample=True, |
| | pad_token_id=tokenizer.eos_token_id, |
| | ) |
| | |
| | response = tokenizer.decode(output[0], skip_special_tokens=True) |
| | yield response.split("assistant:")[-1].strip() |
| | else: |
| | |
| | response = "" |
| | for chunk in client.text_generation( |
| | "\n".join([f"{m['role']}: {m['content']}" for m in messages]), |
| | max_new_tokens=max_tokens, |
| | stream=True, |
| | temperature=temperature, |
| | top_p=top_p, |
| | ): |
| | response += chunk |
| | yield response.split("assistant:")[-1].strip() |
| |
|
| | |
| | demo = gr.ChatInterface( |
| | respond, |
| | additional_inputs=[ |
| | gr.Textbox( |
| | value="Odpowiadasz w j臋zyku polskim. Jeste艣 Coder/Developer/Programista i tworzysz pe艂ny kod.", |
| | label="Wiadomo艣膰 systemowa" |
| | ), |
| | gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Maksymalna liczba nowych token贸w"), |
| | gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperatura"), |
| | gr.Slider( |
| | minimum=0.1, |
| | maximum=1.0, |
| | value=0.95, |
| | step=0.05, |
| | label="Top-p (pr贸bkowanie nucleus)", |
| | ), |
| | gr.Checkbox(label="U偶yj lokalnego modelu", value=False), |
| | ], |
| | title="Zaawansowany interfejs czatu AI", |
| | description="Czatuj z modelem AI, korzystaj膮c z Hugging Face Inference API lub lokalnego modelu.", |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |