import sys import numpy as np import tensorflow as tf from PyQt5.QtWidgets import (QApplication, QWidget, QVBoxLayout, QHBoxLayout, QTextEdit, QPushButton, QLineEdit, QLabel, QFileDialog, QTabWidget, QProgressBar) from PyQt5.QtCore import Qt, QThread, pyqtSignal from PyQt5.QtGui import QPixmap import sounddevice as sd import soundfile as sf import librosa from PIL import Image from multimodal_transformer import MultiModalTransformer, HParams class WorkerThread(QThread): finished = pyqtSignal(object) def __init__(self, func, *args, **kwargs): super().__init__() self.func = func self.args = args self.kwargs = kwargs def run(self): result = self.func(*self.args, **self.kwargs) self.finished.emit(result) class EnhancedChatGUI(QWidget): def __init__(self, model): super().__init__() self.model = model self.initUI() def initUI(self): self.setWindowTitle('MultiModal Transformer Interface') self.setGeometry(100, 100, 800, 600) layout = QVBoxLayout() # Create tabs self.tabs = QTabWidget() self.tabs.addTab(self.createChatTab(), "Chat") self.tabs.addTab(self.createSpeechTab(), "Speech Recognition") self.tabs.addTab(self.createImageTab(), "Image Captioning") self.tabs.addTab(self.createMusicTab(), "Music Generation") self.tabs.addTab(self.createAnomalyTab(), "Anomaly Detection") layout.addWidget(self.tabs) self.setLayout(layout) def createChatTab(self): widget = QWidget() layout = QVBoxLayout() self.chatDisplay = QTextEdit() self.chatDisplay.setReadOnly(True) layout.addWidget(self.chatDisplay) inputLayout = QHBoxLayout() self.inputField = QLineEdit() self.inputField.returnPressed.connect(self.sendMessage) inputLayout.addWidget(self.inputField) sendButton = QPushButton('Send') sendButton.clicked.connect(self.sendMessage) inputLayout.addWidget(sendButton) layout.addLayout(inputLayout) traitLayout = QHBoxLayout() self.traitLabel = QLabel('Adjust trait:') self.traitInput = QLineEdit() self.traitValue = QLineEdit() self.traitButton = QPushButton('Update') self.traitButton.clicked.connect(self.updateTrait) traitLayout.addWidget(self.traitLabel) traitLayout.addWidget(self.traitInput) traitLayout.addWidget(self.traitValue) traitLayout.addWidget(self.traitButton) layout.addLayout(traitLayout) widget.setLayout(layout) return widget def createSpeechTab(self): widget = QWidget() layout = QVBoxLayout() self.recordButton = QPushButton('Record Audio (5 seconds)') self.recordButton.clicked.connect(self.recordAudio) layout.addWidget(self.recordButton) self.speechOutput = QTextEdit() self.speechOutput.setReadOnly(True) layout.addWidget(self.speechOutput) widget.setLayout(layout) return widget def createImageTab(self): widget = QWidget() layout = QVBoxLayout() self.imageButton = QPushButton('Select Image') self.imageButton.clicked.connect(self.selectImage) layout.addWidget(self.imageButton) self.imageLabel = QLabel() layout.addWidget(self.imageLabel) self.captionOutput = QTextEdit() self.captionOutput.setReadOnly(True) layout.addWidget(self.captionOutput) widget.setLayout(layout) return widget def createMusicTab(self): widget = QWidget() layout = QVBoxLayout() self.generateMusicButton = QPushButton('Generate Music') self.generateMusicButton.clicked.connect(self.generateMusic) layout.addWidget(self.generateMusicButton) self.musicOutput = QTextEdit() self.musicOutput.setReadOnly(True) layout.addWidget(self.musicOutput) widget.setLayout(layout) return widget def createAnomalyTab(self): widget = QWidget() layout = QVBoxLayout() self.anomalyButton = QPushButton('Detect Anomalies') self.anomalyButton.clicked.connect(self.detectAnomalies) layout.addWidget(self.anomalyButton) self.anomalyOutput = QTextEdit() self.anomalyOutput.setReadOnly(True) layout.addWidget(self.anomalyOutput) widget.setLayout(layout) return widget def sendMessage(self): userInput = self.inputField.text() self.inputField.clear() safeWordResponse = self.model.safe_word_format(userInput) if safeWordResponse: self.displayMessage("User: " + userInput) self.displayMessage("AI: " + safeWordResponse) return self.displayMessage("User: " + userInput) response = self.model.conversation(userInput) self.displayMessage("AI: " + response) def displayMessage(self, message): self.chatDisplay.append(message) def updateTrait(self): trait = self.traitInput.text() value = float(self.traitValue.text()) try: self.model.fine_tune_personality(trait, value) self.displayMessage(f"System: Updated {trait} to {value}") except ValueError as e: self.displayMessage(f"System Error: {str(e)}") def recordAudio(self): duration = 5 # seconds fs = 16000 # Sample rate recording = sd.rec(int(duration * fs), samplerate=fs, channels=1) sd.wait() sf.write('temp_recording.wav', recording, fs) self.processSpeech('temp_recording.wav') def processSpeech(self, file_path): audio, _ = librosa.load(file_path, sr=16000) audio_tensor = tf.convert_to_tensor(audio, dtype=tf.float32) audio_tensor = tf.expand_dims(audio_tensor, axis=0) worker = WorkerThread(self.model.pipe, audio_tensor, 'speech_recognition') worker.finished.connect(self.onSpeechRecognitionFinished) worker.start() def onSpeechRecognitionFinished(self, result): self.speechOutput.setText(f"Recognized Speech: {result}") def selectImage(self): file_path, _ = QFileDialog.getOpenFileName(self, "Select Image", "", "Image Files (*.png *.jpg *.bmp)") if file_path: pixmap = QPixmap(file_path) self.imageLabel.setPixmap(pixmap.scaled(300, 300, Qt.KeepAspectRatio)) self.processImage(file_path) def processImage(self, file_path): image = Image.open(file_path) image = image.resize((224, 224)) image_array = np.array(image) / 255.0 image_tensor = tf.convert_to_tensor(image_array, dtype=tf.float32) image_tensor = tf.expand_dims(image_tensor, axis=0) worker = WorkerThread(self.model.pipe, [image_tensor, tf.zeros((1, 1), dtype=tf.int32)], 'image_captioning') worker.finished.connect(self.onImageCaptioningFinished) worker.start() def onImageCaptioningFinished(self, result): self.captionOutput.setText(f"Generated Caption: {result}") def generateMusic(self): # Generate random music input (you might want to create a more meaningful input) pitch = tf.random.uniform((1, 100), maxval=128, dtype=tf.int32) duration = tf.random.uniform((1, 100), maxval=32, dtype=tf.int32) velocity = tf.random.uniform((1, 100), maxval=128, dtype=tf.int32) worker = WorkerThread(self.model.pipe, [pitch, duration, velocity], 'music_generation') worker.finished.connect(self.onMusicGenerationFinished) worker.start() def onMusicGenerationFinished(self, result): self.musicOutput.setText(f"Generated Music: {result}") def detectAnomalies(self): # Generate random input for anomaly detection anomaly_input = tf.random.normal((1, 100, 768)) worker = WorkerThread(self.model.pipe, anomaly_input, 'anomaly_detection') worker.finished.connect(self.onAnomalyDetectionFinished) worker.start() def onAnomalyDetectionFinished(self, result): reconstructed, anomalies = result self.anomalyOutput.setText(f"Detected Anomalies: {anomalies}") def main(): # Initialize your model here hparams = HParams( n_vocab=50000, n_ctx=1024, n_embd=768, n_head=12, n_layer=12 ) knowledge_base = [ {'text': 'Example knowledge 1', 'vector': np.random.rand(768)}, {'text': 'Example knowledge 2', 'vector': np.random.rand(768)}, ] model = MultiModalTransformer(hparams, knowledge_base) app = QApplication(sys.argv) gui = EnhancedChatGUI(model) gui.show() sys.exit(app.exec_()) if __name__ == '__main__': main()