akshil-jain commited on
Commit
97c2b63
·
verified ·
1 Parent(s): 6df4a14

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +74 -14
  2. app.py +111 -0
  3. requirements.txt +8 -0
README.md CHANGED
@@ -1,14 +1,74 @@
1
- ---
2
- title: Video Transcript Chatbot
3
- emoji:
4
- colorFrom: pink
5
- colorTo: yellow
6
- sdk: gradio
7
- sdk_version: 5.34.1
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: RAG powered app that turns any YouTube video into a chatbot
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Video Transcript Chatbot
2
+
3
+ A beginner-friendly Gradio app that turns any YouTube video into a conversational chatbot using LangChain and Hugging Face Inference API.
4
+
5
+ ---
6
+
7
+ ## Features
8
+
9
+ - **Dynamic Video Input**: Paste a full YouTube URL or raw video ID.
10
+ - **Embedding Model Selection**: Pick any HF embedding model (default: `sentence-transformers/all-MiniLM-L6-v2`).
11
+ - **LLM Model Selection**: Choose any HF text-generation model (default: `meta-llama/Llama-3.1-8B-Instruct`).
12
+ - **Secure Token Entry**: You must enter your own HF API token at runtime—no hard-coded defaults.
13
+ - **Conversational Memory**: Multi-turn chat history is preserved.
14
+ - **Retrieval-Augmented Generation**: Uses FAISS + transcript context to ground answers.
15
+
16
+ ---
17
+
18
+ ## Prerequisites
19
+
20
+ - **Python 3.8+**
21
+ - **Hugging Face API Token** with Inference access:
22
+ https://huggingface.co/settings/tokens
23
+ - **Git** (for cloning the repo)
24
+
25
+ ---
26
+
27
+ ## Installation
28
+
29
+ 1. **Clone the repo**
30
+ ```bash
31
+ git clone https://github.com/<your-username>/yt-rag-chatbot.git
32
+ cd yt-rag-chatbot
33
+ 2. **(Optional) Create a virtual environment**
34
+ ```bash
35
+ python -m venv venv
36
+ source venv/bin/activate # macOS/Linux
37
+ venv\Scripts\activate # Windows
38
+ 3. **Install dependencies**
39
+ ```bash
40
+ python -m venv venv
41
+ source venv/bin/activate # macOS/Linux
42
+ venv\Scripts\activate # Windows
43
+
44
+ **Usage**
45
+ 1. **Start the app:**
46
+ ```bash
47
+ python app.py
48
+ 2. **Open** your browser at the local URL (e.g. http://127.0.0.1:7860)
49
+ 3. **Use the UI:**
50
+
51
+ - **YouTube Video URL or ID:** Paste your link/ID.
52
+
53
+ - **Embedding Model:** Leave default or enter another HF embedding model.
54
+
55
+ - **LLM Model:** Enter your desired HF LLM repo.
56
+
57
+ - **Your HF API Token:** Paste your token (input hidden).
58
+
59
+ - Click **Initialize Chat** to load and index the transcript.
60
+
61
+ - Ask questions in the chat window to interact with the video content.
62
+
63
+
64
+ **Customization**
65
+
66
+ - **Default Models:** Edit the default values for embedding_model_input and llm_model_input in app.py.
67
+
68
+ - **Retrieval Size:** Change the k value in the retriever configuration:
69
+ ```python
70
+ retriever = vector_store.as_retriever(search_kwargs={'k': 4})
71
+
72
+
73
+
74
+
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import gradio as gr
4
+ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain_huggingface import HuggingFaceEndpointEmbeddings, HuggingFaceEndpoint, ChatHuggingFace
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain.prompts import PromptTemplate
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain.chains import ConversationalRetrievalChain
11
+
12
+ # No default token: user must supply their Hugging Face API token via the UI
13
+
14
+ def extract_video_id(url_or_id: str) -> str:
15
+ pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11})"
16
+ match = re.search(pattern, url_or_id)
17
+ return match.group(1) if match else url_or_id
18
+
19
+ # Load, embed, and index the transcript
20
+ def load_vector_store(video_id: str, huggingface_token: str, embedding_model: str):
21
+ # Temporarily set the token for embedding calls
22
+ os.environ['HUGGINGFACEHUB_API_TOKEN'] = huggingface_token.strip()
23
+ try:
24
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
25
+ transcript = ' '.join(chunk['text'] for chunk in transcript_list)
26
+ except TranscriptsDisabled:
27
+ transcript = ''
28
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
29
+ docs = splitter.create_documents([transcript])
30
+ embeddings = HuggingFaceEndpointEmbeddings(
31
+ model=embedding_model,
32
+ huggingfacehub_api_token=os.environ['HUGGINGFACEHUB_API_TOKEN']
33
+ )
34
+ return FAISS.from_documents(docs, embeddings)
35
+
36
+ # Initialize/reinitialize the QA chain
37
+ def setup(video_input, embedding_model, llm_model, huggingface_token):
38
+ video_id = extract_video_id(video_input)
39
+ vector_store = load_vector_store(video_id, huggingface_token, embedding_model)
40
+ retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 4})
41
+
42
+ prompt_template = '''
43
+ You are a helpful assistant.
44
+ Answer ONLY from the provided transcript context.
45
+ If the context is insufficient, say you don't know.
46
+
47
+ {context}
48
+ Question: {question}
49
+ '''
50
+ prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
51
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
52
+
53
+ # Configure the LLM endpoint
54
+ os.environ['HUGGINGFACEHUB_API_TOKEN'] = huggingface_token.strip()
55
+ hf_llm = HuggingFaceEndpoint(
56
+ repo_id=llm_model,
57
+ task='text-generation',
58
+ max_new_tokens=512,
59
+ temperature=0.2,
60
+ huggingfacehub_api_token=os.environ['HUGGINGFACEHUB_API_TOKEN']
61
+ )
62
+ chat_model = ChatHuggingFace(llm=hf_llm, verbose=True)
63
+
64
+ qa_chain = ConversationalRetrievalChain.from_llm(
65
+ llm=chat_model,
66
+ retriever=retriever,
67
+ memory=memory,
68
+ chain_type='stuff',
69
+ return_source_documents=False
70
+ )
71
+
72
+ # Reset chat history
73
+ return [], [], qa_chain
74
+
75
+ # Handle chat interactions
76
+ def respond(message, chat_history, qa_chain):
77
+ result = qa_chain({'question': message, 'chat_history': chat_history})
78
+ answer = result.get('answer') or result.get('result')
79
+ chat_history.append((message, answer))
80
+ return chat_history, chat_history
81
+
82
+ # Gradio UI layout
83
+ with gr.Blocks() as demo:
84
+ gr.Markdown('# Video Transcript Chatbot')
85
+ with gr.Row():
86
+ video_input = gr.Textbox(label='YouTube Video URL or ID', value='')
87
+ embedding_model_input = gr.Textbox(
88
+ label='Embedding Model (default: sentence-transformers/all-MiniLM-L6-v2)',
89
+ value='sentence-transformers/all-MiniLM-L6-v2'
90
+ )
91
+ llm_model_input = gr.Textbox(label='LLM Model Repo (e.g. google/flan-t5-large)', value='meta-llama/Llama-3.1-8B-Instruct')
92
+ token_input = gr.Textbox(label='Your HF API Token', placeholder='hf_...', type='password')
93
+ init_btn = gr.Button('Initialize Chat')
94
+
95
+ chatbot = gr.Chatbot()
96
+ chat_state = gr.State([])
97
+ chain_state = gr.State(None)
98
+
99
+ init_btn.click(
100
+ setup,
101
+ inputs=[video_input, embedding_model_input, llm_model_input, token_input],
102
+ outputs=[chatbot, chat_state, chain_state]
103
+ )
104
+
105
+ txt = gr.Textbox(placeholder='Ask a question about the video...', show_label=False)
106
+ txt.submit(respond, inputs=[txt, chat_state, chain_state], outputs=[chatbot, chat_state])
107
+
108
+ gr.Button('Clear Chat').click(lambda: ([], []), None, [chatbot, chat_state])
109
+
110
+ if __name__ == '__main__':
111
+ demo.launch() # pass share=True or host/port if needed
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ youtube-transcript-api
2
+ langchain-community
3
+ langchain-openai
4
+ faiss-cpu
5
+ tiktoken
6
+ python-dotenv
7
+ langchain-huggingface
8
+ gradio