amitkumarjaiswal commited on
Commit
b5bc73d
·
1 Parent(s): 728d462

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -0
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import databutton as db
3
+
4
+ from langchain.llms import OpenAI
5
+ from langchain.chains import RetrievalQA
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.document_loaders import YoutubeLoader
8
+ from langchain.docstore.document import Document
9
+ from langchain.vectorstores import FAISS
10
+ from langchain.embeddings import OpenAIEmbeddings
11
+
12
+
13
+ def text_custom(font_size, text):
14
+ '''
15
+ font_size := ['b', 'm', 's']
16
+ '''
17
+ result=f'<p class="{font_size}-font">{text}</p>'
18
+ return result
19
+
20
+
21
+ def main():
22
+
23
+ st.set_page_config(
24
+ page_title="Welcome to Question Answering on YouTube video page",
25
+ layout="wide", # {wide, centered}
26
+ )
27
+ # reference
28
+ ## https://discuss.streamlit.io/t/change-input-text-font-size/29959/4
29
+ ## https://discuss.streamlit.io/t/change-font-size-in-st-write/7606/2
30
+ st.markdown("""<style>.b-font {font-size:25px !important;}</style>""", unsafe_allow_html=True)
31
+ st.markdown("""<style>.m-font {font-size:20px !important;}</style>""" , unsafe_allow_html=True)
32
+ st.markdown("""<style>.s-font {font-size:15px !important;}</style>""" , unsafe_allow_html=True)
33
+ tabs_font_css = """<style>div[class*="stTextInput"] label {font-size: 15px;color: black;}</style>"""
34
+ st.write(tabs_font_css, unsafe_allow_html=True)
35
+
36
+ st.title("YouTube QnA")
37
+ t = "Watch all Youtube videos... Sometimes it's hard, right? Just throw us a URL and ask. I'll answer anything. 😋"
38
+ st.markdown(text_custom('m', t), unsafe_allow_html=True)
39
+ st.info('Note: This youtube video itself should have transcript', icon="ℹ️")
40
+
41
+ api_key = st.text_input(
42
+ "Enter Open AI Key.",
43
+ placeholder = "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",
44
+ type="password"
45
+ )
46
+
47
+ user_in_url = st.text_input(
48
+ "Please enter Youtube URL.",
49
+ value = "https://www.youtube.com/watch?v=o8NPllzkFhE",
50
+ )
51
+
52
+ if user_in_url:
53
+ width = 40
54
+ side = max((100 - width) / 2, 0.01)
55
+ _, container, _ = st.columns([side, width, side])
56
+ container.video(data=user_in_url)
57
+
58
+ user_question = st.text_input(
59
+ "Please enter your questions in the video.",
60
+ placeholder = "What is the Linux and Why it is created?"
61
+ )
62
+
63
+ user_in_lang = st.text_input(
64
+ "Tell us what language the Youtube video is in (For example.. enter 'en' for English or 'hi' for Hindi).",
65
+ value = "en",
66
+ )
67
+
68
+ with st.sidebar:
69
+ embeddeing_model = st.selectbox(
70
+ label='Embedding Model',
71
+ options=['text-embedding-ada-002']
72
+ )
73
+
74
+ llm_model = st.selectbox(
75
+ label='LLM Model',
76
+ options=["text-davinci-003",
77
+ "text-curie-001",
78
+ "text-babbage-001",
79
+ "text-ada-001"]
80
+ )
81
+
82
+ chain = st.radio(
83
+ label='Chain type',
84
+ options=['stuff',
85
+ 'map_reduce',
86
+ 'refine']
87
+ )
88
+
89
+ temperature = st.slider(
90
+ "Temperature",
91
+ 0.0, 1.0, 0.7,
92
+ )
93
+
94
+
95
+ if st.button("Hi GPT! Answer the question right now."):
96
+ API=api_key
97
+ if not API:
98
+ st.warning("Enter your OPENAI API-KEY. If you don't have one Get your OpenAI API key from [here](https://platform.openai.com/account/api-keys).")
99
+
100
+
101
+ # 1. get text data from external source(Youtube video transcription)
102
+ # 참고: https://python.langchain.com/en/latest/modules/indexes/document_loaders.html
103
+ documents = YoutubeLoader.from_youtube_url(user_in_url, language=user_in_lang).load()
104
+
105
+
106
+ # 2. text preprocessing(Chunking)
107
+ text_splitter = RecursiveCharacterTextSplitter(
108
+ chunk_size=800,
109
+ separators=['\n\n', '\n', '.', '!', '?', ',', ' ', ''],
110
+ chunk_overlap=200
111
+ )
112
+ docs=text_splitter.split_text(documents[0].page_content)
113
+ new_docs = [Document(page_content=chunk) for chunk in docs]
114
+
115
+
116
+ # 3. define embedding model & provider
117
+ embeddings = OpenAIEmbeddings(openai_api_key=API, model=embeddeing_model)
118
+
119
+ # 4. create embedding vectorstore(Vector DB) to use as the index
120
+ db = FAISS.from_documents(new_docs, embeddings)
121
+
122
+ # 5. Make chin for `question-answering` task with an information retriever
123
+ retriever = db.as_retriever()
124
+
125
+ qa = RetrievalQA.from_chain_type(
126
+ llm=OpenAI(openai_api_key=API,
127
+ model=llm_model,
128
+ temperature=temperature,
129
+ verbose=True),
130
+ chain_type=chain,
131
+ retriever=retriever,
132
+ return_source_documents=True,
133
+ verbose=True)
134
+
135
+ with st.spinner("Running to answer your question .."):
136
+ query = user_question
137
+ result = qa({"query": user_question})
138
+ st.success(result['result'])
139
+
140
+
141
+ if __name__=='__main__':
142
+ main()