File size: 2,738 Bytes
e9aa5df | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
import whisper
from TTS.api import TTS
# 🧠 Lade Sprach-KI Modell (z. B. Mistral)
model_name = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
torch_dtype=torch.float16
)
# 🎤 Sprach-zu-Text Modell (OpenAI Whisper)
stt_model = whisper.load_model("base")
# 🗣️ Text-zu-Sprache Modell (deutsche Stimme)
tts_model = TTS(model_name="tts_models/de/thorsten/tacotron2-DCA", progress_bar=False, gpu=torch.cuda.is_available())
# 🧩 Chat-Verlauf und Antwortgenerierung
def chat_with_ai(prompt, history=[]):
full_prompt = ""
for user, bot in history:
full_prompt += f"User: {user}\nAssistant: {bot}\n"
full_prompt += f"User: {prompt}\nAssistant:"
inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=256, temperature=0.7, top_p=0.95)
reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
answer = reply.split("Assistant:")[-1].strip()
history.append((prompt, answer))
return answer, history
# 🎤 Spracheingabe verarbeiten (Speech to Text)
def speech_to_text(audio_path):
audio = whisper.load_audio(audio_path)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(stt_model.device)
result = stt_model.decode(mel)
return result.text
# 🗣️ Text in Sprache umwandeln
def text_to_speech(text):
tts_model.tts_to_file(text=text, file_path="tts_output.wav")
return "tts_output.wav"
# 🧩 Gradio Oberfläche
with gr.Blocks(title="Meine KI") as demo:
gr.Markdown("## 🤖 Meine eigene KI (deutsch, mit Stimme)")
chatbot = gr.Chatbot()
text_input = gr.Textbox(label="💬 Nachricht eingeben")
audio_input = gr.Audio(source="microphone", type="filepath", label="🎤 Spracheingabe")
audio_output = gr.Audio(label="🗣️ KI-Antwort als Audio", type="filepath")
state = gr.State([])
def handle_text(message, history):
reply, updated_history = chat_with_ai(message, history)
voice = text_to_speech(reply)
return reply, updated_history, voice
def handle_audio(audio, history):
transcribed = speech_to_text(audio)
reply, updated_history = chat_with_ai(transcribed, history)
voice = text_to_speech(reply)
return reply, updated_history, voice
text_input.submit(handle_text, [text_input, state], [chatbot, state, audio_output])
audio_input.change(handle_audio, [audio_input, state], [chatbot, state, audio_output])
demo.launch() |