Daksh0505 commited on
Commit
63a076a
·
verified ·
1 Parent(s): 5e7cee3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -22
app.py CHANGED
@@ -93,8 +93,11 @@ def create_vector_store(transcript):
93
  )
94
  return FAISS.from_documents(docs, embeddings)
95
 
96
- # Build Model
 
 
97
  def build_model(model_choice, temperature=0.7):
 
98
  if model_choice == "Flan-T5 (Free)":
99
  llm = HuggingFaceEndpoint(
100
  repo_id="google/flan-t5-base",
@@ -102,7 +105,8 @@ def build_model(model_choice, temperature=0.7):
102
  max_new_tokens=500,
103
  temperature=temperature
104
  )
105
- return ChatHuggingFace(llm=llm)
 
106
  elif model_choice == "DeepSeek":
107
  llm = HuggingFaceEndpoint(
108
  repo_id="deepseek-ai/DeepSeek-V3.2-Exp",
@@ -110,7 +114,8 @@ def build_model(model_choice, temperature=0.7):
110
  task="text-generation",
111
  max_new_tokens=500
112
  )
113
- return ChatHuggingFace(llm=llm, temperature=temperature)
 
114
  elif model_choice == "OpenAI":
115
  llm = HuggingFaceEndpoint(
116
  repo_id="openai/gpt-oss-20b",
@@ -118,9 +123,12 @@ def build_model(model_choice, temperature=0.7):
118
  task="text-generation",
119
  max_new_tokens=500
120
  )
121
- return ChatHuggingFace(llm=llm, temperature=temperature)
 
122
 
123
- # Prompt Template
 
 
124
  prompt_template = PromptTemplate(
125
  template=(
126
  "Answer the question based on the context below.\n\n"
@@ -131,8 +139,11 @@ prompt_template = PromptTemplate(
131
  input_variables=["context", "question"]
132
  )
133
 
134
- # UI
135
- st.title("YouTube Transcript Chatbot")
 
 
 
136
 
137
  video_id = st.text_input("YouTube Video ID", value="lv1_-RER4_I")
138
  query = st.text_area("Your Query", value="What is RAG?")
@@ -144,38 +155,59 @@ language_code = None
144
  if video_id:
145
  with st.spinner("Checking available languages..."):
146
  available_languages = get_available_languages(video_id)
147
-
148
  if available_languages:
149
  st.success(f"Found {len(available_languages)} language(s)")
150
  lang_options = {label: code for code, label in available_languages}
151
  selected_label = st.selectbox("Select Language", options=list(lang_options.keys()))
152
  language_code = lang_options[selected_label]
153
  else:
154
- st.warning("No languages found")
 
155
 
 
 
 
156
  if st.button("Run Chatbot"):
157
  if not video_id or not query or not language_code:
158
- st.warning("Please fill in all fields and select a language.")
159
  else:
160
  with st.spinner("Fetching transcript..."):
161
  transcript = get_transcript(video_id, language_code)
162
-
163
  if not transcript:
164
- st.error("Could not fetch transcript.")
165
  else:
166
- st.success(f"Transcript fetched ({len(transcript)} characters).")
167
-
168
- with st.spinner("Generating response..."):
169
  retriever = create_vector_store(transcript).as_retriever(
170
  search_type="mmr",
171
  search_kwargs={"k": 5}
172
  )
173
  relevant_docs = retriever.invoke(query)
174
  context_text = "\n\n".join(doc.page_content for doc in relevant_docs)
175
- prompt = prompt_template.format(context=context_text, question=query)
176
-
177
- model = build_model(model_choice, temperature)
178
- response = model.invoke(prompt)
179
- response_text = response.content if hasattr(response, 'content') else str(response)
180
-
181
- st.text_area("Response", value=response_text, height=400)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  )
94
  return FAISS.from_documents(docs, embeddings)
95
 
96
+ # -------------------------------------------------
97
+ # 3️⃣ Model Builder
98
+ # -------------------------------------------------
99
  def build_model(model_choice, temperature=0.7):
100
+ """Return the correct model and a flag indicating if it’s chat-based."""
101
  if model_choice == "Flan-T5 (Free)":
102
  llm = HuggingFaceEndpoint(
103
  repo_id="google/flan-t5-base",
 
105
  max_new_tokens=500,
106
  temperature=temperature
107
  )
108
+ return llm, False # (model, is_chat)
109
+
110
  elif model_choice == "DeepSeek":
111
  llm = HuggingFaceEndpoint(
112
  repo_id="deepseek-ai/DeepSeek-V3.2-Exp",
 
114
  task="text-generation",
115
  max_new_tokens=500
116
  )
117
+ return ChatHuggingFace(llm=llm, temperature=temperature), True
118
+
119
  elif model_choice == "OpenAI":
120
  llm = HuggingFaceEndpoint(
121
  repo_id="openai/gpt-oss-20b",
 
123
  task="text-generation",
124
  max_new_tokens=500
125
  )
126
+ return ChatHuggingFace(llm=llm, temperature=temperature), True
127
+
128
 
129
+ # -------------------------------------------------
130
+ # 4️⃣ Prompt Template
131
+ # -------------------------------------------------
132
  prompt_template = PromptTemplate(
133
  template=(
134
  "Answer the question based on the context below.\n\n"
 
139
  input_variables=["context", "question"]
140
  )
141
 
142
+
143
+ # -------------------------------------------------
144
+ # 5️⃣ Streamlit App UI
145
+ # -------------------------------------------------
146
+ st.title("🎬 YouTube Transcript Chatbot (RAG)")
147
 
148
  video_id = st.text_input("YouTube Video ID", value="lv1_-RER4_I")
149
  query = st.text_area("Your Query", value="What is RAG?")
 
155
  if video_id:
156
  with st.spinner("Checking available languages..."):
157
  available_languages = get_available_languages(video_id)
158
+
159
  if available_languages:
160
  st.success(f"Found {len(available_languages)} language(s)")
161
  lang_options = {label: code for code, label in available_languages}
162
  selected_label = st.selectbox("Select Language", options=list(lang_options.keys()))
163
  language_code = lang_options[selected_label]
164
  else:
165
+ st.warning("No languages found for this video.")
166
+
167
 
168
+ # -------------------------------------------------
169
+ # 6️⃣ Run Chatbot
170
+ # -------------------------------------------------
171
  if st.button("Run Chatbot"):
172
  if not video_id or not query or not language_code:
173
+ st.warning("⚠️ Please fill in all fields and select a language.")
174
  else:
175
  with st.spinner("Fetching transcript..."):
176
  transcript = get_transcript(video_id, language_code)
177
+
178
  if not transcript:
179
+ st.error("Could not fetch transcript.")
180
  else:
181
+ st.success(f"Transcript fetched ({len(transcript)} characters).")
182
+
183
+ with st.spinner("Creating knowledge base..."):
184
  retriever = create_vector_store(transcript).as_retriever(
185
  search_type="mmr",
186
  search_kwargs={"k": 5}
187
  )
188
  relevant_docs = retriever.invoke(query)
189
  context_text = "\n\n".join(doc.page_content for doc in relevant_docs)
190
+
191
+ prompt = prompt_template.format(context=context_text, question=query)
192
+
193
+ with st.spinner(f"Generating response using {model_choice}..."):
194
+ model, is_chat = build_model(model_choice, temperature)
195
+
196
+ try:
197
+ if is_chat:
198
+ # DeepSeek & OpenAI (chat-based)
199
+ response = model.invoke(prompt)
200
+ response_text = (
201
+ response.content if hasattr(response, "content") else str(response)
202
+ )
203
+ else:
204
+ # Flan-T5 (non-chat)
205
+ response = model(prompt)
206
+ if isinstance(response, list) and "generated_text" in response[0]:
207
+ response_text = response[0]["generated_text"]
208
+ else:
209
+ response_text = str(response)
210
+
211
+ st.text_area("🧠 Model Response", value=response_text, height=400)
212
+ except Exception as e:
213
+ st.error(f"Model generation failed: {e}")