Prajwal3009 commited on
Commit
ee703ac
·
verified ·
1 Parent(s): b49b78e

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -0
app.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import websockets
3
+ import pyaudio
4
+ import threading
5
+ import logging
6
+ import json
7
+ import time
8
+ import struct
9
+ import openai
10
+ from fastapi import FastAPI, WebSocket
11
+ from fastapi.responses import HTMLResponse
12
+ from openai import OpenAI
13
+ from dotenv import load_dotenv
14
+ import os
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+ from speech import record_audio
17
+ from fastapi import FastAPI, File, UploadFile,Form
18
+ from fastapi.responses import JSONResponse
19
+
20
+ load_dotenv()
21
+ client = OpenAI()
22
+ OpenAI_API_KEY = os.getenv("OPENAI_API_KEY")
23
+
24
+ # Audio configuration
25
+ FORMAT = pyaudio.paInt16
26
+ CHANNELS = 1
27
+ RATE = 16000
28
+ CHUNK = 1024
29
+
30
+ # Initialize FastAPI
31
+ app = FastAPI()
32
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
33
+ app.add_middleware( CORSMiddleware, allow_origins=["http://localhost:3000"], # Allow requests from this origin
34
+ allow_credentials=True,
35
+ allow_methods=["*"],
36
+ allow_headers=["*"],
37
+ )
38
+ chat_history = []
39
+ # OpenAI API key
40
+ openai.api_key = OpenAI_API_KEY
41
+ @app.get("/api-key")
42
+ def get_api_key():
43
+ return {"API_KEY": os.getenv("OPENAI_API_KEY")}
44
+ @app.post("/upload")
45
+ async def upload_file(file: UploadFile = File(...)):
46
+ try:
47
+ contents = await file.read()
48
+ with open("audio.wav", "wb") as f:
49
+ f.write(contents) # Process the audio file with Whisper model
50
+ text = process_audio_with_whisper("audio.wav") # Generate response with GPT-4.0
51
+ if "generate an image" in text.lower():
52
+ image_url = generate_image_with_dalle(text)
53
+ chat_history.append({"type": "image", "content": image_url})
54
+ return JSONResponse(content={"image_url": image_url})
55
+ else:
56
+ response = generate_response_with_gpt4(text)
57
+ chat_history.append({"type": "text", "content": response})
58
+ return JSONResponse(content={"response": response})
59
+ except Exception as e:
60
+ logging.error(f"Error processing file: {e}")
61
+ return JSONResponse(content={"error": str(e)}, status_code=500)
62
+
63
+ @app.post("/text-input")
64
+ async def text_input(prompt: str = Form(...)):
65
+ try: # Determine if the user is asking for an image
66
+ if "generate an image" in prompt.lower() or "generate a realistic image" in prompt.lower():
67
+ image_url = generate_image_with_dalle(prompt)
68
+ chat_history.append({"type": "image", "content": image_url})
69
+ return JSONResponse(content={"image_url": image_url})
70
+ else: response = generate_response_with_gpt4(prompt)
71
+ chat_history.append({"type": "text", "content": response})
72
+ return JSONResponse(content={"response": response})
73
+ except Exception as e:
74
+ logging.error(f"Error processing text input: {e}")
75
+ return JSONResponse(content={"error": str(e)}, status_code=500)
76
+
77
+ @app.post("/image-url-input")
78
+ async def image_input(url: str = Form(...), prompt: str = Form(...)):
79
+ try:
80
+ image_url = url
81
+ response = process_image_with_gpt4(image_url, prompt)
82
+ chat_history.append({"type": "text", "content": response})
83
+ return JSONResponse(content={"response": response})
84
+ except Exception as e:
85
+ logging.error(f"Error processing image input: {e}")
86
+ return JSONResponse(content={"error": str(e)}, status_code=500)
87
+
88
+ @app.get("/chat-history")
89
+ async def get_chat_history():
90
+ return JSONResponse(content={"chat_history": chat_history})
91
+
92
+ filepath = "audio.wav"
93
+
94
+ def process_audio_with_whisper(filepath): # Save the audio data to a file
95
+ # with open("audio.wav", "wb") as f:
96
+ # f.write(audio_data) # Transcribe the audio file using OpenAI's Whisper model
97
+ try:
98
+ audio_file= open(filepath, "rb")
99
+ transcription = client.audio.transcriptions.create(
100
+ model="whisper-1",
101
+ file=audio_file,
102
+
103
+ )
104
+ print(transcription.text)
105
+ return transcription.text
106
+ except Exception as e:
107
+ logging.error(f"Error transcribing audio: {e}")
108
+ raise
109
+
110
+ def generate_response_with_gpt4(text):
111
+ try:
112
+ completion = client.chat.completions.create(
113
+ model="gpt-4-turbo",
114
+ messages=[
115
+ {"role": "system", "content": "You are a helpful assistant."},
116
+ {
117
+ "role": "user",
118
+ "content": text
119
+ }
120
+ ]
121
+ )
122
+ print(completion.choices[0].message.content)
123
+ return completion.choices[0].message.content
124
+ except Exception as e:
125
+ logging.error(f"Error generating response: {e}")
126
+ raise
127
+
128
+
129
+
130
+ # response.choices[0].text.strip()
131
+
132
+ def generate_image_with_dalle(prompt):
133
+ response = client.images.generate(
134
+ model="dall-e-3",
135
+ prompt=prompt,
136
+ size="1024x1024",
137
+ quality="hd",
138
+ n=1,
139
+ )
140
+ return response.data[0].url
141
+
142
+ def process_image_with_gpt4(url,text):
143
+ try:
144
+
145
+ completion = client.chat.completions.create(
146
+ model="gpt-4o",
147
+ messages=[
148
+ {
149
+ "role": "user",
150
+ "content": [
151
+ {"type": "text", "text": text},
152
+ {
153
+ "type": "image_url",
154
+ "image_url": {
155
+ "url": url,
156
+ }
157
+ },
158
+ ],
159
+ }
160
+ ],
161
+ )
162
+ return completion.choices[0].message.content
163
+ except Exception as e:
164
+ logging.error(f"Error processing image: {e}")
165
+ raise
166
+
167
+ if __name__ == "__main__":
168
+ import uvicorn
169
+ uvicorn.run(app, host="0.0.0.0", port=8000)