focusit commited on
Commit
e8df325
·
1 Parent(s): ec011d3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +166 -0
app.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import openai
3
+ import tempfile
4
+ import numpy as np
5
+ import pandas as pd
6
+ from pytube import YouTube, Search
7
+ import os
8
+ import pinecone
9
+ import streamlit as st
10
+ from streamlit_chat import message
11
+ import pinecone_utils
12
+
13
+
14
+
15
+
16
+ openai.api_key = os.getenv("openai_key")
17
+
18
+ video_dict = {
19
+ "url": [],
20
+ "title": [],
21
+ "content": []
22
+ }
23
+
24
+
25
+ def video_to_audio(video_URL):
26
+ # Get the video
27
+ video = YouTube(video_URL)
28
+ video_dict["url"].append(video_URL)
29
+ try:
30
+ video_dict["title"].append(video.title)
31
+ except:
32
+ video_dict["title"].append("Title not found")
33
+
34
+
35
+ # Convert video to Audio
36
+ audio = video.streams.filter(only_audio=True).first()
37
+
38
+ temp_dir = tempfile.mkdtemp()
39
+ variable = np.random.randint(1111, 1111111)
40
+ file_name = f'recording{variable}.mp3'
41
+ temp_path = os.path.join(temp_dir, file_name)
42
+ # audio_in = AudioSegment.from_file(uploaded_file.name, format="m4a")
43
+ # with open(temp_path, "wb") as f:
44
+ # f.write(uploaded_file.getvalue())
45
+
46
+ # Save to destination
47
+ output = audio.download(output_path=temp_path)
48
+
49
+ audio_file = open(output, "rb")
50
+ textt = openai.Audio.translate("whisper-1", audio_file)["text"]
51
+
52
+ return textt
53
+
54
+
55
+
56
+
57
+ def create_dataframe(data):
58
+ df = pd.DataFrame(data)
59
+ df.to_csv("history.csv")
60
+
61
+ s = Search("Youtube video title")
62
+ print(len(s.results))
63
+
64
+ for ele in s.results[0:5:1]:
65
+ transcription = video_to_audio(ele.watch_url)
66
+ print(transcription)
67
+ print("\n\n\n")
68
+ video_dict["content"].append(transcription)
69
+
70
+ create_dataframe(video_dict)
71
+
72
+ print("Created Dataframe")
73
+
74
+
75
+ pinecone.init(api_key=os.getenv("pinecone_key"), environment="us-east-1-aws")
76
+
77
+ pinecone.create_index(
78
+ "demo-youtube-app",
79
+ dimension=1536,
80
+ metric="cosine",
81
+ pod_type="p1"
82
+ )
83
+
84
+ index = pinecone.Index("demo-youtube-app")
85
+ print(index.describe_index_stats())
86
+
87
+ def get_embedding(text):
88
+ response = openai.Embedding.create(
89
+ input=text,
90
+ model="text-embedding-ada-002"
91
+ )
92
+
93
+ return response['data'][0]['embedding']
94
+
95
+
96
+ def addData(index,url, title,context):
97
+ my_id = index.describe_index_stats()['total_vector_count']
98
+
99
+ chunkInfo = (str(my_id),
100
+ get_embedding(context),
101
+ {'video_url': url, 'title':title,'context':context})
102
+
103
+ index.upsert(vectors=[chunkInfo])
104
+
105
+
106
+ def find_top_match(query, k):
107
+ query_em = pinecone_utils.get_embedding(query)
108
+ result = index.query(query_em, top_k=k, includeMetadata=True)
109
+
110
+ return [result['matches'][i]['metadata']['video_url'] for i in range(k)], [result['matches'][i]['metadata']['title']
111
+ for i in range(k)], [
112
+ result['matches'][i]['metadata']['context']
113
+ for i in range(k)]
114
+
115
+
116
+
117
+ def get_message_history(contexts):
118
+
119
+ message_hist = [
120
+ {"role": "system",
121
+ "content": """As a Bot, it's important to show empathy and understanding when answering questions.You are a smart AI who have to answer the question only from the provided context If you
122
+ are unable to understand the question and need more clarity then your response should be 'Could you please be
123
+ more specific?'. If you are unable to find the answer from the given context then your response should be 'Answer is not present in the provided video' \n"""},
124
+ {"role": "system", "content": contexts},
125
+ ]
126
+
127
+ return message_hist
128
+
129
+ def chat(user_query, message, role="user"):
130
+ message_history.append({"role": role, "content": f"{var}"})
131
+ completion = openai.ChatCompletion.create(
132
+ model="gpt-3.5-turbo",
133
+ messages=message
134
+ )
135
+ reply = completion.choices[0].message.content
136
+ message_history.append({"role": "assistant", "content": f"{reply}"})
137
+ return reply
138
+
139
+
140
+
141
+
142
+ # container for chat history
143
+ response_container = st.container()
144
+ # container for text box
145
+ textcontainer = st.container()
146
+
147
+ with textcontainer:
148
+ user_input = get_text()
149
+
150
+ if st.session_state.past or user_input:
151
+ urls, title, context = find_top_match(user_input, 1)
152
+ message_history = get_message_history(context[0])
153
+
154
+ with st.spinner("Generating the answer..."):
155
+ response = chat(user_input, message_history)
156
+
157
+ st.session_state.past.append(user_input)
158
+ st.session_state.generated.append(response)
159
+
160
+ st.subheader("References")
161
+
162
+ link_expander = st.expander("Context obtained from url")
163
+ link_expander.write(urls)
164
+
165
+
166
+