Spaces:

focusit
/

ksv

Runtime error

App Files Files Community

focusit commited on Sep 13, 2023

Commit

e8df325

1 Parent(s): ec011d3

Create app.py

Browse files

Files changed (1) hide show

app.py +166 -0

app.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import openai
+import tempfile
+import numpy as np
+import pandas as pd
+from pytube import YouTube, Search
+import os
+import pinecone
+import streamlit as st
+from streamlit_chat import message
+import pinecone_utils
+openai.api_key = os.getenv("openai_key")
+video_dict = {
+    "url": [],
+    "title": [],
+    "content": []
+}
+def video_to_audio(video_URL):
+    # Get the video
+    video = YouTube(video_URL)
+    video_dict["url"].append(video_URL)
+    try:
+        video_dict["title"].append(video.title)
+    except:
+        video_dict["title"].append("Title not found")
+    # Convert video to Audio
+    audio = video.streams.filter(only_audio=True).first()
+    temp_dir = tempfile.mkdtemp()
+    variable = np.random.randint(1111, 1111111)
+    file_name = f'recording{variable}.mp3'
+    temp_path = os.path.join(temp_dir, file_name)
+    # audio_in = AudioSegment.from_file(uploaded_file.name, format="m4a")
+    # with open(temp_path, "wb") as f:
+    #     f.write(uploaded_file.getvalue())
+    # Save to destination
+    output = audio.download(output_path=temp_path)
+    audio_file = open(output, "rb")
+    textt = openai.Audio.translate("whisper-1", audio_file)["text"]
+    return textt
+def create_dataframe(data):
+    df = pd.DataFrame(data)
+    df.to_csv("history.csv")
+s = Search("Youtube video title")
+print(len(s.results))
+for ele in s.results[0:5:1]:
+    transcription = video_to_audio(ele.watch_url)
+    print(transcription)
+    print("\n\n\n")
+    video_dict["content"].append(transcription)
+create_dataframe(video_dict)
+print("Created Dataframe")
+pinecone.init(api_key=os.getenv("pinecone_key"), environment="us-east-1-aws")
+pinecone.create_index(
+    "demo-youtube-app",
+    dimension=1536,
+    metric="cosine",
+    pod_type="p1"
+)
+index = pinecone.Index("demo-youtube-app")
+print(index.describe_index_stats())
+def get_embedding(text):
+    response = openai.Embedding.create(
+        input=text,
+        model="text-embedding-ada-002"
+    )
+    return response['data'][0]['embedding']
+def addData(index,url, title,context):
+    my_id = index.describe_index_stats()['total_vector_count']
+    chunkInfo = (str(my_id),
+                 get_embedding(context),
+                 {'video_url': url, 'title':title,'context':context})
+    index.upsert(vectors=[chunkInfo])
+def find_top_match(query, k):
+    query_em = pinecone_utils.get_embedding(query)
+    result = index.query(query_em, top_k=k, includeMetadata=True)
+    return [result['matches'][i]['metadata']['video_url'] for i in range(k)], [result['matches'][i]['metadata']['title']
+                                                                               for i in range(k)], [
+               result['matches'][i]['metadata']['context']
+               for i in range(k)]
+def get_message_history(contexts):
+    message_hist = [
+        {"role": "system",
+         "content": """As a Bot, it's important to show empathy and understanding when answering questions.You are a smart AI who have to answer the question only from the provided context If you
+     are unable to understand the question and need more clarity then your response should be 'Could you please be
+     more specific?'. If you are unable to find the answer from the given context then your response should be 'Answer is not present in the provided video' \n"""},
+        {"role": "system", "content": contexts},
+    ]
+    return message_hist
+def chat(user_query, message, role="user"):
+    message_history.append({"role": role, "content": f"{var}"})
+    completion = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=message
+    )
+    reply = completion.choices[0].message.content
+    message_history.append({"role": "assistant", "content": f"{reply}"})
+    return reply
+# container for chat history
+response_container = st.container()
+# container for text box
+textcontainer = st.container()
+with textcontainer:
+    user_input = get_text()
+    if st.session_state.past or user_input:
+        urls, title, context = find_top_match(user_input, 1)
+        message_history = get_message_history(context[0])
+        with st.spinner("Generating the answer..."):
+            response = chat(user_input, message_history)
+        st.session_state.past.append(user_input)
+        st.session_state.generated.append(response)
+        st.subheader("References")
+        link_expander = st.expander("Context obtained from url")
+        link_expander.write(urls)