File size: 2,603 Bytes
7e1ed06
217d3b5
 
 
1530e95
217d3b5
7e1ed06
 
 
1530e95
 
217d3b5
188b55b
1530e95
 
7e1ed06
 
 
 
 
 
3476e99
 
 
 
1530e95
 
7e1ed06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e9e315
7e1ed06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain_openai import ChatOpenAI
from langchain_huggingface import HuggingFaceEmbeddings
#from langchain.chat_models import ChatOpenAI
#from langchain_openai import OpenAIEmbeddings
#from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

from langchain_community.vectorstores import FAISS
#from langchain_community.chat_models import ChatOpenAI
#from langchain_openai import OpenAIEmbeddings


from langchain.prompts import PromptTemplate
import streamlit as st
from dotenv import load_dotenv
import os


proxies = {
    "http":  "http://139.59.1.14:80",
    "https": "http://139.59.1.14:80"
}


hf_token = os.environ.get("HF_API_KEY")

load_dotenv()

st.title("🎥 Chat with YouTube Video")

video_id = 'gb262LDH1So'

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2" )

llm = llm_cht = ChatOpenAI(
    model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
    temperature=0.7,
)

parser = StrOutputParser()

prompt = PromptTemplate(
    template="""
      You are a helpful assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know.

      {context}
      Question: {question}
    """,
    input_variables = ['context', 'question']
)


def format_docs(retrieved_docs):
  context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
  return context_text


if video_id:
    transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"], proxies=proxies)
    transcript = " ".join(chunk["text"] for chunk in transcript_list)

    #splitting text into Chunks
    chunks = splitter.create_documents([transcript])

    #Embedding
    vector_store = FAISS.from_documents(chunks, embedding)

    #retriever
    retriever= vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

    question = st.text_input("Ask something about the video")

    if question:
       parallel_chain = RunnableParallel({
        'context': retriever | RunnableLambda(format_docs),
        'question': RunnablePassthrough()
        })
       
       main_chain = parallel_chain | prompt | llm_cht | parser

       result = main_chain.invoke(question)

       st.write("🧠 Answer:", result)