Spaces:

Prajwal3009
/

OpenAiChatbot

Paused

App Files Files Community

Prajwal3009 commited on Nov 10, 2024

Commit

aad25fe

verified ·

1 Parent(s): 4227b5b

Upload 12 files

Browse files

Files changed (12) hide show

backend/app/.env +1 -0
backend/app/1.tflite +3 -0
backend/app/__pycache__/speech.cpython-312.pyc +0 -0
backend/app/audio.wav +0 -0
backend/app/client.py +76 -0
backend/app/server.py +169 -0
backend/app/speech.py +100 -0
backend/app/trans.py +29 -0
backend/app/uploaded_image.png +0 -0
backend/app/yamnet_label_list.txt +0 -0
backend/docker.dockerfile +15 -0
backend/requirements.txt +0 -0

backend/app/.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ OPENAI_API_KEY = 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

backend/app/1.tflite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:10c95ea3eb9a7bb4cb8bddf6feb023250381008177ac162ce169694d05c317de
+size 4126810

backend/app/__pycache__/speech.cpython-312.pyc ADDED Viewed

Binary file (4.68 kB). View file

backend/app/audio.wav ADDED Viewed

Binary file (255 kB). View file

backend/app/client.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import asyncio
+import websockets
+import pyaudio
+import threading
+import logging
+import json
+import time
+import struct
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Audio configuration
+FORMAT = pyaudio.paInt16
+CHANNELS = 1
+RATE = 16000
+CHUNK = 1024
+AUDIO_SERVER_URL = 'ws://localhost:8000/ws' # Your websocket URL
+# Example: AUDIO_SERVER_URL = "ws://localhost:8080/ws/your_user_id"
+async def audio_sender(queue, websocket):
+    while True:
+        audio_data = await queue.get()
+        await websocket.send(audio_data)
+def record_audio_to_queue(queue, loop):
+    p = pyaudio.PyAudio()
+    stream = p.open(format=FORMAT,
+                    channels=CHANNELS,
+                    rate=RATE,
+                    input=True,
+                    frames_per_buffer=CHUNK)
+    # print("Recording audio...")
+    try:
+        while True:
+            data = stream.read(CHUNK)
+            asyncio.run_coroutine_threadsafe(queue.put(data), loop)
+    except Exception as e:
+        print(f"Error recording audio: {e}")
+    finally:
+        stream.stop_stream()
+        stream.close()
+        p.terminate()
+        asyncio.run_coroutine_threadsafe(queue.put(None), loop)
+async def receive_messages(websocket):
+    try:
+        while True:
+            message = await websocket.recv()
+            # message = json.loads(message)
+            # if message.get('chatType') == 'transcription' or 'transcription_with_response' or 'ova_response_textual':
+                # logging.info(message.get('text', '\n'))
+            logging.info(message)
+    except websockets.ConnectionClosed:
+        print("Connection closed")
+    except Exception as e:
+        print(f"Error receiving message: {e}")
+async def main():
+    async with websockets.connect(AUDIO_SERVER_URL) as websocket:
+        queue = asyncio.Queue()
+        loop = asyncio.get_event_loop()
+        audio_thread = threading.Thread(target=record_audio_to_queue, args=(queue, loop))
+        audio_thread.start()
+        await asyncio.gather(
+            audio_sender(queue, websocket),
+            receive_messages(websocket),
+        )
+        audio_thread.join()
+if __name__ == "__main__":
+    asyncio.run(main())

backend/app/server.py ADDED Viewed

	@@ -0,0 +1,169 @@

+import asyncio
+import websockets
+import pyaudio
+import threading
+import logging
+import json
+import time
+import struct
+import openai
+from fastapi import FastAPI, WebSocket
+from fastapi.responses import HTMLResponse
+from openai import OpenAI
+from dotenv import load_dotenv
+import os
+from fastapi.middleware.cors import CORSMiddleware
+from speech import record_audio
+from fastapi import FastAPI, File, UploadFile,Form
+from fastapi.responses import JSONResponse
+load_dotenv()
+client = OpenAI()
+OpenAI_API_KEY = os.getenv("OPENAI_API_KEY")
+# Audio configuration
+FORMAT = pyaudio.paInt16
+CHANNELS = 1
+RATE = 16000
+CHUNK = 1024
+# Initialize FastAPI
+app = FastAPI()
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+app.add_middleware( CORSMiddleware, allow_origins=["http://localhost:3000"], # Allow requests from this origin
+                   allow_credentials=True,
+                   allow_methods=["*"],
+                   allow_headers=["*"],
+                     )
+chat_history = []
+# OpenAI API key
+openai.api_key = OpenAI_API_KEY
+@app.get("/api-key")
+def get_api_key():
+    return {"API_KEY": os.getenv("OPENAI_API_KEY")}
+@app.post("/upload")
+async def upload_file(file: UploadFile = File(...)):
+    try:
+        contents = await file.read()
+        with open("audio.wav", "wb") as f:
+            f.write(contents) # Process the audio file with Whisper model
+            text = process_audio_with_whisper("audio.wav") # Generate response with GPT-4.0
+            if "generate an image" in text.lower():
+                image_url = generate_image_with_dalle(text)
+                chat_history.append({"type": "image", "content": image_url})
+                return JSONResponse(content={"image_url": image_url})
+            else:
+                response = generate_response_with_gpt4(text)
+                chat_history.append({"type": "text", "content": response})
+                return JSONResponse(content={"response": response})
+    except Exception as e:
+        logging.error(f"Error processing file: {e}")
+        return JSONResponse(content={"error": str(e)}, status_code=500)
+@app.post("/text-input")
+async def text_input(prompt: str = Form(...)):
+    try: # Determine if the user is asking for an image
+        if "generate an image" in prompt.lower() or "generate a realistic image" in prompt.lower():
+            image_url = generate_image_with_dalle(prompt)
+            chat_history.append({"type": "image", "content": image_url})
+            return JSONResponse(content={"image_url": image_url})
+        else: response = generate_response_with_gpt4(prompt)
+        chat_history.append({"type": "text", "content": response})
+        return JSONResponse(content={"response": response})
+    except Exception as e:
+        logging.error(f"Error processing text input: {e}")
+        return JSONResponse(content={"error": str(e)}, status_code=500)
+@app.post("/image-url-input")
+async def image_input(url: str = Form(...), prompt: str = Form(...)):
+    try:
+        image_url = url
+        response = process_image_with_gpt4(image_url, prompt)
+        chat_history.append({"type": "text", "content": response})
+        return JSONResponse(content={"response": response})
+    except Exception as e:
+        logging.error(f"Error processing image input: {e}")
+        return JSONResponse(content={"error": str(e)}, status_code=500)
+@app.get("/chat-history")
+async def get_chat_history():
+    return JSONResponse(content={"chat_history": chat_history})
+filepath = "audio.wav"
+def process_audio_with_whisper(filepath): # Save the audio data to a file
+    # with open("audio.wav", "wb") as f:
+    #     f.write(audio_data) # Transcribe the audio file using OpenAI's Whisper model
+    try:
+            audio_file= open(filepath, "rb")
+            transcription = client.audio.transcriptions.create(
+            model="whisper-1",
+            file=audio_file,
+            )
+            print(transcription.text)
+            return transcription.text
+    except Exception as e:
+        logging.error(f"Error transcribing audio: {e}")
+        raise
+def generate_response_with_gpt4(text):
+    try:
+        completion = client.chat.completions.create(
+        model="gpt-4-turbo",
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant."},
+            {
+                "role": "user",
+                "content": text
+            }
+        ]
+    )
+        print(completion.choices[0].message.content)
+        return completion.choices[0].message.content
+    except Exception as e:
+        logging.error(f"Error generating response: {e}")
+        raise
+    # response.choices[0].text.strip()
+def generate_image_with_dalle(prompt):
+    response = client.images.generate(
+    model="dall-e-3",
+    prompt=prompt,
+    size="1024x1024",
+    quality="hd",
+    n=1,
+)
+    return response.data[0].url
+def process_image_with_gpt4(url,text):
+    try:
+        completion = client.chat.completions.create(
+        model="gpt-4o",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": text},
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": url,
+                        }
+                    },
+                ],
+            }
+        ],
+    )
+        return completion.choices[0].message.content
+    except Exception as e:
+        logging.error(f"Error processing image: {e}")
+        raise
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

backend/app/speech.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import pyaudio
+import numpy as np
+import tensorflow as tf
+import zipfile
+import wave
+import time
+# Audio stream configuration
+FORMAT = pyaudio.paInt16  # 16-bit PCM
+CHANNELS = 1  # Mono channel
+RATE = 16000  # 16kHz sample rate
+CHUNK = 1024  # Buffer size
+TARGET_LENGTH = 15600
+SILENCE_THRESHOLD = 5000  # 5 seconds of silence
+audio_buffer = np.zeros(TARGET_LENGTH, dtype=np.float32)
+model_path = '1.tflite'
+interpreter = tf.lite.Interpreter(model_path=model_path)
+interpreter.allocate_tensors()
+input_details = interpreter.get_input_details()
+output_details = interpreter.get_output_details()
+waveform_input_index = input_details[0]['index']
+scores_output_index = output_details[0]['index']
+with zipfile.ZipFile(model_path) as z:
+    with z.open('yamnet_label_list.txt') as f:
+        labels = [line.decode('utf-8').strip() for line in f]
+# Ensure the input tensor is correctly sized
+interpreter.resize_tensor_input(waveform_input_index, [TARGET_LENGTH], strict=False)
+interpreter.allocate_tensors()
+# Initialize PyAudio
+p = pyaudio.PyAudio()
+def record_audio():
+    try:
+        # Open the audio stream
+        stream = p.open(format=FORMAT,
+                        channels=CHANNELS,
+                        rate=RATE,
+                        input=True,
+                        frames_per_buffer=CHUNK)
+        print("Recording... Press Ctrl+C to stop.")
+        # Open a .wav file to save the audio
+        wf = wave.open("audio.wav", 'wb')
+        wf.setnchannels(CHANNELS)
+        wf.setsampwidth(p.get_sample_size(FORMAT))
+        wf.setframerate(RATE)
+        last_speech_time = time.time()
+        # Continuously read from the stream and append to audio_data
+        while True:
+            audio_data = stream.read(CHUNK)
+            audio_chunk = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
+            audio_buffer = np.roll(audio_buffer, -len(audio_chunk))
+            audio_buffer[-len(audio_chunk):] = audio_chunk
+            # Write audio data to the .wav file
+            wf.writeframes(audio_data)
+            # Set the tensor data
+            interpreter.set_tensor(waveform_input_index, audio_buffer)
+            # Run the model
+            interpreter.invoke()
+            scores = interpreter.get_tensor(scores_output_index)
+            # Get the top classification result
+            top_class_index = scores.argmax()
+            prediction = labels[top_class_index]
+            print(prediction)
+            # Check for silence
+            if np.max(np.abs(audio_chunk)) > 0.01:
+                last_speech_time = time.time()
+            elif time.time() - last_speech_time > SILENCE_THRESHOLD / 1000:
+                print("Silence detected. Stopping recording.")
+                break
+    except KeyboardInterrupt:
+        # Handle the KeyboardInterrupt to stop recording
+        print("\nRecording stopped by user.")
+    finally:
+        # Stop and close the stream and terminate PyAudio
+        stream.stop_stream()
+        stream.close()
+        p.terminate()
+        wf.close()
+        print("Stream closed and resources released.")
+    return "audio.wav"
+if __name__ == "__main__":
+    record_audio()

backend/app/trans.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from openai import OpenAI
+import os
+from dotenv import load_dotenv
+# Load environment variables from .env file
+load_dotenv()
+# Access environment variables
+api_key = os.getenv('OPENAI_API_KEY')
+client = OpenAI()
+# audio_file = open("audio.wav", "rb")
+# transcription = client.audio.transcriptions.create(
+#     model="whisper-1",
+#     file=audio_file
+# )
+# print(transcription.text)
+def process_audio_with_whisper(): # Save the audio data to a file
+    with open("audio.wav", "rb") as audio_file:
+        transcription = client.audio.transcriptions.create(
+            model="whisper-1", file=audio_file
+            )
+        print(transcription.text)
+        return transcription.text
+if __name__ == "__main__":
+    process_audio_with_whisper()

backend/app/uploaded_image.png ADDED Viewed

backend/app/yamnet_label_list.txt ADDED Viewed

File without changes

backend/docker.dockerfile ADDED Viewed

	@@ -0,0 +1,15 @@

+FROM python:3.12.4
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+COPY --chown=user .env .env
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

backend/requirements.txt ADDED Viewed

Binary file (218 Bytes). View file