from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled from langchain_openai import ChatOpenAI from langchain_huggingface import HuggingFaceEmbeddings #from langchain.chat_models import ChatOpenAI #from langchain_openai import OpenAIEmbeddings #from langchain.embeddings import HuggingFaceEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda from langchain_core.output_parsers import StrOutputParser from langchain_community.vectorstores import FAISS #from langchain_community.chat_models import ChatOpenAI #from langchain_openai import OpenAIEmbeddings from langchain.prompts import PromptTemplate import streamlit as st from dotenv import load_dotenv import os proxies = { "http": "http://139.59.1.14:80", "https": "http://139.59.1.14:80" } hf_token = os.environ.get("HF_API_KEY") load_dotenv() st.title("🎥 Chat with YouTube Video") video_id = 'gb262LDH1So' splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2" ) llm = llm_cht = ChatOpenAI( model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free", temperature=0.7, ) parser = StrOutputParser() prompt = PromptTemplate( template=""" You are a helpful assistant. Answer ONLY from the provided transcript context. If the context is insufficient, just say you don't know. {context} Question: {question} """, input_variables = ['context', 'question'] ) def format_docs(retrieved_docs): context_text = "\n\n".join(doc.page_content for doc in retrieved_docs) return context_text if video_id: transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"], proxies=proxies) transcript = " ".join(chunk["text"] for chunk in transcript_list) #splitting text into Chunks chunks = splitter.create_documents([transcript]) #Embedding vector_store = FAISS.from_documents(chunks, embedding) #retriever retriever= vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4}) question = st.text_input("Ask something about the video") if question: parallel_chain = RunnableParallel({ 'context': retriever | RunnableLambda(format_docs), 'question': RunnablePassthrough() }) main_chain = parallel_chain | prompt | llm_cht | parser result = main_chain.invoke(question) st.write("🧠 Answer:", result)