Spaces:

Daksh0505
/

Youtube-Chatbot

Sleeping

App Files Files Community

Daksh0505 commited on Oct 10

Commit

63a076a

verified ·

1 Parent(s): 5e7cee3

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -22

app.py CHANGED Viewed

@@ -93,8 +93,11 @@ def create_vector_store(transcript):
     )
     return FAISS.from_documents(docs, embeddings)
-# Build Model
 def build_model(model_choice, temperature=0.7):
     if model_choice == "Flan-T5 (Free)":
         llm = HuggingFaceEndpoint(
             repo_id="google/flan-t5-base",
@@ -102,7 +105,8 @@ def build_model(model_choice, temperature=0.7):
             max_new_tokens=500,
             temperature=temperature
         )
-        return ChatHuggingFace(llm=llm)
     elif model_choice == "DeepSeek":
         llm = HuggingFaceEndpoint(
             repo_id="deepseek-ai/DeepSeek-V3.2-Exp",
@@ -110,7 +114,8 @@ def build_model(model_choice, temperature=0.7):
             task="text-generation",
             max_new_tokens=500
         )
-        return ChatHuggingFace(llm=llm, temperature=temperature)
     elif model_choice == "OpenAI":
         llm = HuggingFaceEndpoint(
             repo_id="openai/gpt-oss-20b",
@@ -118,9 +123,12 @@ def build_model(model_choice, temperature=0.7):
             task="text-generation",
             max_new_tokens=500
         )
-        return ChatHuggingFace(llm=llm, temperature=temperature)
-# Prompt Template
 prompt_template = PromptTemplate(
     template=(
         "Answer the question based on the context below.\n\n"
@@ -131,8 +139,11 @@ prompt_template = PromptTemplate(
     input_variables=["context", "question"]
 )
-# UI
-st.title("YouTube Transcript Chatbot")
 video_id = st.text_input("YouTube Video ID", value="lv1_-RER4_I")
 query = st.text_area("Your Query", value="What is RAG?")
@@ -144,38 +155,59 @@ language_code = None
 if video_id:
     with st.spinner("Checking available languages..."):
         available_languages = get_available_languages(video_id)
     if available_languages:
         st.success(f"Found {len(available_languages)} language(s)")
         lang_options = {label: code for code, label in available_languages}
         selected_label = st.selectbox("Select Language", options=list(lang_options.keys()))
         language_code = lang_options[selected_label]
     else:
-        st.warning("No languages found")
 if st.button("Run Chatbot"):
     if not video_id or not query or not language_code:
-        st.warning("Please fill in all fields and select a language.")
     else:
         with st.spinner("Fetching transcript..."):
             transcript = get_transcript(video_id, language_code)
             if not transcript:
-                st.error("Could not fetch transcript.")
             else:
-                st.success(f"Transcript fetched ({len(transcript)} characters).")
-                with st.spinner("Generating response..."):
                     retriever = create_vector_store(transcript).as_retriever(
                         search_type="mmr",
                         search_kwargs={"k": 5}
                     )
                     relevant_docs = retriever.invoke(query)
                     context_text = "\n\n".join(doc.page_content for doc in relevant_docs)
-                    prompt = prompt_template.format(context=context_text, question=query)
-                    model = build_model(model_choice, temperature)
-                    response = model.invoke(prompt)
-                    response_text = response.content if hasattr(response, 'content') else str(response)
-                    st.text_area("Response", value=response_text, height=400)

     )
     return FAISS.from_documents(docs, embeddings)
+# -------------------------------------------------
+# 3️⃣ Model Builder
+# -------------------------------------------------
 def build_model(model_choice, temperature=0.7):
+    """Return the correct model and a flag indicating if it’s chat-based."""
     if model_choice == "Flan-T5 (Free)":
         llm = HuggingFaceEndpoint(
             repo_id="google/flan-t5-base",
             max_new_tokens=500,
             temperature=temperature
         )
+        return llm, False  # (model, is_chat)
     elif model_choice == "DeepSeek":
         llm = HuggingFaceEndpoint(
             repo_id="deepseek-ai/DeepSeek-V3.2-Exp",
             task="text-generation",
             max_new_tokens=500
         )
+        return ChatHuggingFace(llm=llm, temperature=temperature), True
     elif model_choice == "OpenAI":
         llm = HuggingFaceEndpoint(
             repo_id="openai/gpt-oss-20b",
             task="text-generation",
             max_new_tokens=500
         )
+        return ChatHuggingFace(llm=llm, temperature=temperature), True
+# -------------------------------------------------
+# 4️⃣ Prompt Template
+# -------------------------------------------------
 prompt_template = PromptTemplate(
     template=(
         "Answer the question based on the context below.\n\n"
     input_variables=["context", "question"]
 )
+# -------------------------------------------------
+# 5️⃣ Streamlit App UI
+# -------------------------------------------------
+st.title("🎬 YouTube Transcript Chatbot (RAG)")
 video_id = st.text_input("YouTube Video ID", value="lv1_-RER4_I")
 query = st.text_area("Your Query", value="What is RAG?")
 if video_id:
     with st.spinner("Checking available languages..."):
         available_languages = get_available_languages(video_id)
     if available_languages:
         st.success(f"Found {len(available_languages)} language(s)")
         lang_options = {label: code for code, label in available_languages}
         selected_label = st.selectbox("Select Language", options=list(lang_options.keys()))
         language_code = lang_options[selected_label]
     else:
+        st.warning("No languages found for this video.")
+# -------------------------------------------------
+# 6️⃣ Run Chatbot
+# -------------------------------------------------
 if st.button("Run Chatbot"):
     if not video_id or not query or not language_code:
+        st.warning("⚠️ Please fill in all fields and select a language.")
     else:
         with st.spinner("Fetching transcript..."):
             transcript = get_transcript(video_id, language_code)
             if not transcript:
+                st.error("❌ Could not fetch transcript.")
             else:
+                st.success(f"✅ Transcript fetched ({len(transcript)} characters).")
+                with st.spinner("Creating knowledge base..."):
                     retriever = create_vector_store(transcript).as_retriever(
                         search_type="mmr",
                         search_kwargs={"k": 5}
                     )
                     relevant_docs = retriever.invoke(query)
                     context_text = "\n\n".join(doc.page_content for doc in relevant_docs)
+                prompt = prompt_template.format(context=context_text, question=query)
+                with st.spinner(f"Generating response using {model_choice}..."):
+                    model, is_chat = build_model(model_choice, temperature)
+                    try:
+                        if is_chat:
+                            # DeepSeek & OpenAI (chat-based)
+                            response = model.invoke(prompt)
+                            response_text = (
+                                response.content if hasattr(response, "content") else str(response)
+                            )
+                        else:
+                            # Flan-T5 (non-chat)
+                            response = model(prompt)
+                            if isinstance(response, list) and "generated_text" in response[0]:
+                                response_text = response[0]["generated_text"]
+                            else:
+                                response_text = str(response)
+                        st.text_area("🧠 Model Response", value=response_text, height=400)
+                    except Exception as e:
+                        st.error(f"Model generation failed: {e}")