Daksh0505 commited on
Commit
70402ce
Β·
verified Β·
1 Parent(s): 2b0fa65

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -30
app.py CHANGED
@@ -5,6 +5,7 @@ from langchain_community.vectorstores import FAISS
5
  from langchain.prompts import PromptTemplate
6
  from langchain.llms import HuggingFacePipeline
7
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
 
8
  import torch
9
  import os
10
  import requests
@@ -16,28 +17,39 @@ if not RAPIDAPI_KEY:
16
  st.error("❌ RAPIDAPI_KEY not set. Please add it in your environment variables.")
17
 
18
 
19
- # πŸ“Ό Transcript Fetcher
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  @st.cache_data
21
  def get_transcript(video_id, language_code="en"):
22
- url = "https://youtube-transcript3.p.rapidapi.com/api/transcript"
23
- querystring = {"videoId": video_id, "lang": language_code}
24
- headers = {
25
- "x-rapidapi-key": RAPIDAPI_KEY,
26
- "x-rapidapi-host": "youtube-transcript3.p.rapidapi.com"
27
- }
28
  try:
29
- response = requests.get(url, headers=headers, params=querystring, timeout=10)
30
- if response.status_code != 200:
31
- st.error(f"API Error: {response.status_code}")
32
- return None
33
- data = response.json()
34
- if data.get("success") and "transcript" in data:
35
- return ' '.join([item.get('text', '') for item in data["transcript"]])
36
- else:
37
- st.warning("Unexpected API response format")
38
- return None
39
  except Exception as e:
40
- st.error(f"Error: {str(e)}")
41
  return None
42
 
43
 
@@ -53,7 +65,7 @@ def create_vector_store(transcript):
53
  return FAISS.from_documents(docs, embeddings)
54
 
55
 
56
- # πŸ€– Load Free Flan-T5 locally (Better than BLOOM)
57
  @st.cache_resource
58
  def load_flan_t5():
59
  model_name = "google/flan-t5-base"
@@ -71,10 +83,10 @@ def load_flan_t5():
71
  return HuggingFacePipeline(pipeline=pipe)
72
 
73
 
74
- # 🧩 Build model (handles endpoints + free local model)
75
  def build_model(model_choice, temperature=0.7):
76
  if model_choice == "DeepSeek":
77
- repo_id = "deepseek-ai/DeepSeek-V3.2-Exp" # paid
78
  llm = HuggingFaceEndpoint(
79
  repo_id=repo_id,
80
  huggingfacehub_api_token=api_key,
@@ -83,7 +95,7 @@ def build_model(model_choice, temperature=0.7):
83
  )
84
  return ChatHuggingFace(llm=llm, temperature=temperature)
85
  elif model_choice == "OpenAI":
86
- repo_id = "openai/gpt-oss-20b" # paid
87
  llm = HuggingFaceEndpoint(
88
  repo_id=repo_id,
89
  huggingfacehub_api_token=api_key,
@@ -92,10 +104,10 @@ def build_model(model_choice, temperature=0.7):
92
  )
93
  return ChatHuggingFace(llm=llm, temperature=temperature)
94
  else:
95
- return load_flan_t5() # free local Flan-T5
96
 
97
 
98
- # 🧾 Prompt Template (Simplified for T5)
99
  prompt_template = PromptTemplate(
100
  template=(
101
  "Answer the question based on the context below.\n\n"
@@ -108,19 +120,48 @@ prompt_template = PromptTemplate(
108
 
109
 
110
  # πŸš€ Streamlit UI
111
- st.title("πŸŽ₯ YouTube Transcript Chatbot (Hybrid: Free + Paid)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
- video_id = st.text_input("YouTube Video ID", value="lv1_-RER4_I")
114
- query = st.text_area("Your Query", value="What is RAG?")
 
 
115
  model_choice = st.radio("Model to Use", ["Flan-T5 (Free)", "DeepSeek", "OpenAI"])
116
- temperature = st.slider("Temperature", 0, 100, value=50) / 100.0
117
 
 
 
 
 
118
  if st.button("πŸš€ Run Chatbot"):
119
  if not video_id or not query:
120
  st.warning("Please fill in all fields.")
121
  else:
122
  with st.spinner("Fetching transcript..."):
123
- transcript = get_transcript(video_id)
124
  if not transcript:
125
  st.error("Could not fetch transcript.")
126
  else:
@@ -144,4 +185,14 @@ if st.button("πŸš€ Run Chatbot"):
144
  response_obj = model.invoke(prompt)
145
  response = response_obj.content if hasattr(response_obj, 'content') else str(response_obj)
146
 
147
- st.text_area("Model Response", value=response, height=400)
 
 
 
 
 
 
 
 
 
 
 
5
  from langchain.prompts import PromptTemplate
6
  from langchain.llms import HuggingFacePipeline
7
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
8
+ from youtube_transcript_api import YouTubeTranscriptApi
9
  import torch
10
  import os
11
  import requests
 
17
  st.error("❌ RAPIDAPI_KEY not set. Please add it in your environment variables.")
18
 
19
 
20
+ # πŸ“‹ List Available Languages
21
+ @st.cache_data
22
+ def list_available_languages(video_id):
23
+ """Fetch all available transcript languages for a video"""
24
+ try:
25
+ api = YouTubeTranscriptApi()
26
+ transcript_list = api.list(video_id)
27
+
28
+ languages = []
29
+ for transcript in transcript_list:
30
+ lang_code = transcript.language_code
31
+ lang_name = transcript.language
32
+ is_generated = transcript.is_generated
33
+ label = f"{lang_name} ({lang_code})" + (" - Auto-generated" if is_generated else " - Manual")
34
+ languages.append((lang_code, label))
35
+
36
+ return languages
37
+ except Exception as e:
38
+ st.warning(f"Could not fetch available languages: {e}")
39
+ return [("en", "English (en) - Default")]
40
+
41
+
42
+ # πŸ“Ό Transcript Fetcher (Using youtube_transcript_api)
43
  @st.cache_data
44
  def get_transcript(video_id, language_code="en"):
45
+ """Fetch transcript using youtube_transcript_api"""
 
 
 
 
 
46
  try:
47
+ api = YouTubeTranscriptApi()
48
+ transcript_list = api.fetch(video_id, languages=[language_code])
49
+ transcript = ' '.join([snippet.text for snippet in transcript_list])
50
+ return transcript
 
 
 
 
 
 
51
  except Exception as e:
52
+ st.error(f"Error fetching transcript: {str(e)}")
53
  return None
54
 
55
 
 
65
  return FAISS.from_documents(docs, embeddings)
66
 
67
 
68
+ # πŸ€– Load Free Flan-T5 locally
69
  @st.cache_resource
70
  def load_flan_t5():
71
  model_name = "google/flan-t5-base"
 
83
  return HuggingFacePipeline(pipeline=pipe)
84
 
85
 
86
+ # 🧩 Build model
87
  def build_model(model_choice, temperature=0.7):
88
  if model_choice == "DeepSeek":
89
+ repo_id = "deepseek-ai/DeepSeek-V3.2-Exp"
90
  llm = HuggingFaceEndpoint(
91
  repo_id=repo_id,
92
  huggingfacehub_api_token=api_key,
 
95
  )
96
  return ChatHuggingFace(llm=llm, temperature=temperature)
97
  elif model_choice == "OpenAI":
98
+ repo_id = "openai/gpt-oss-20b"
99
  llm = HuggingFaceEndpoint(
100
  repo_id=repo_id,
101
  huggingfacehub_api_token=api_key,
 
104
  )
105
  return ChatHuggingFace(llm=llm, temperature=temperature)
106
  else:
107
+ return load_flan_t5()
108
 
109
 
110
+ # 🧾 Prompt Template
111
  prompt_template = PromptTemplate(
112
  template=(
113
  "Answer the question based on the context below.\n\n"
 
120
 
121
 
122
  # πŸš€ Streamlit UI
123
+ st.title("πŸŽ₯ YouTube Transcript Chatbot")
124
+
125
+ # Video ID input
126
+ video_id = st.text_input("YouTube Video ID", value="lv1_-RER4_I", help="Enter the video ID from YouTube URL")
127
+
128
+ # Language selection
129
+ language_code = "en"
130
+ if video_id:
131
+ with st.spinner("Checking available transcripts..."):
132
+ available_langs = list_available_languages(video_id)
133
+
134
+ if available_langs:
135
+ st.success(f"Found {len(available_langs)} available transcript(s)")
136
+
137
+ # Create dropdown with available languages
138
+ lang_options = {label: code for code, label in available_langs}
139
+ selected_label = st.selectbox(
140
+ "Select Transcript Language",
141
+ options=list(lang_options.keys()),
142
+ help="Choose from available transcripts for this video"
143
+ )
144
+ language_code = lang_options[selected_label]
145
+ else:
146
+ st.info("Using default English transcript")
147
+ language_code = "en"
148
 
149
+ # Query input
150
+ query = st.text_area("Your Query", value="What is RAG?", help="Ask a question about the video content")
151
+
152
+ # Model selection
153
  model_choice = st.radio("Model to Use", ["Flan-T5 (Free)", "DeepSeek", "OpenAI"])
 
154
 
155
+ # Temperature slider
156
+ temperature = st.slider("Temperature", 0, 100, value=50, help="Higher = more creative, Lower = more focused") / 100.0
157
+
158
+ # Run button
159
  if st.button("πŸš€ Run Chatbot"):
160
  if not video_id or not query:
161
  st.warning("Please fill in all fields.")
162
  else:
163
  with st.spinner("Fetching transcript..."):
164
+ transcript = get_transcript(video_id, language_code)
165
  if not transcript:
166
  st.error("Could not fetch transcript.")
167
  else:
 
185
  response_obj = model.invoke(prompt)
186
  response = response_obj.content if hasattr(response_obj, 'content') else str(response_obj)
187
 
188
+ st.text_area("Model Response", value=response, height=400)
189
+
190
+ # Sidebar with info
191
+ with st.sidebar:
192
+ st.header("ℹ️ About")
193
+ st.write("This chatbot analyzes YouTube videos using their transcripts.")
194
+ st.write("**Features:**")
195
+ st.write("- Auto-detect available languages")
196
+ st.write("- RAG-based Q&A")
197
+ st.write("- Multiple model options")
198
+ st.write("- Cached for performance")