chat_with_video / app.py
tensorboy0101's picture
Update app.py
217d3b5 verified
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain_openai import ChatOpenAI
from langchain_huggingface import HuggingFaceEmbeddings
#from langchain.chat_models import ChatOpenAI
#from langchain_openai import OpenAIEmbeddings
#from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import FAISS
#from langchain_community.chat_models import ChatOpenAI
#from langchain_openai import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
import streamlit as st
from dotenv import load_dotenv
import os
proxies = {
"http": "http://139.59.1.14:80",
"https": "http://139.59.1.14:80"
}
hf_token = os.environ.get("HF_API_KEY")
load_dotenv()
st.title("🎥 Chat with YouTube Video")
video_id = 'gb262LDH1So'
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2" )
llm = llm_cht = ChatOpenAI(
model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
temperature=0.7,
)
parser = StrOutputParser()
prompt = PromptTemplate(
template="""
You are a helpful assistant.
Answer ONLY from the provided transcript context.
If the context is insufficient, just say you don't know.
{context}
Question: {question}
""",
input_variables = ['context', 'question']
)
def format_docs(retrieved_docs):
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
return context_text
if video_id:
transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"], proxies=proxies)
transcript = " ".join(chunk["text"] for chunk in transcript_list)
#splitting text into Chunks
chunks = splitter.create_documents([transcript])
#Embedding
vector_store = FAISS.from_documents(chunks, embedding)
#retriever
retriever= vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})
question = st.text_input("Ask something about the video")
if question:
parallel_chain = RunnableParallel({
'context': retriever | RunnableLambda(format_docs),
'question': RunnablePassthrough()
})
main_chain = parallel_chain | prompt | llm_cht | parser
result = main_chain.invoke(question)
st.write("🧠 Answer:", result)