Daksh0505 commited on
Commit
e0df4a5
Β·
verified Β·
1 Parent(s): 6edb192

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -37
app.py CHANGED
@@ -1,35 +1,56 @@
1
  import streamlit as st
2
  from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
3
- from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain_community.vectorstores import FAISS
6
  from langchain.prompts import PromptTemplate
7
  import os
 
8
 
9
  api_key = os.getenv("HF_API_KEY")
 
10
 
11
- # πŸ“Ό Transcript Language Options
12
  @st.cache_data
13
- def get_available_languages(video_id):
14
- transcriber = YouTubeTranscriptApi()
 
 
 
 
 
 
 
 
15
  try:
16
- transcript_info = transcriber.list(video_id)
17
- return [(t.language_code, t.language) for t in transcript_info]
18
- except Exception:
19
- return []
20
-
21
- # πŸ“Ό Transcript Fetcher
22
- @st.cache_data
23
- def get_transcript(video_id, language_code):
24
- transcriber = YouTubeTranscriptApi()
25
- try:
26
- transcript_list = transcriber.fetch(video_id, languages=[language_code])
27
- return ' '.join([d.text for d in transcript_list])
28
- except (NoTranscriptFound, TranscriptsDisabled):
29
- return None
30
- except Exception:
31
  return None
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  # 🧠 Embedding Loader
34
  @st.cache_resource
35
  def load_embeddings():
@@ -73,32 +94,49 @@ prompt_template = PromptTemplate(
73
  # πŸš€ App UI
74
  st.title("πŸŽ₯ YouTube Transcript Chatbot")
75
 
76
- video_id = st.text_input("YouTube Video ID", value="lv1_-RER4_I")
77
- if video_id:
78
- langs = get_available_languages(video_id)
79
- lang_options = [f"{name} ({code})" for code, name in langs] if langs else ["No transcript available"]
80
- selected_lang = st.selectbox("Transcript Language", lang_options)
81
- language_code = selected_lang.split("(")[-1].strip(")") if langs else None
82
- else:
83
- language_code = None
 
 
 
 
84
 
85
  query = st.text_area("Your Query", value="What is RAG?")
86
  model_choice = st.radio("Model to Use", ["DeepSeek", "OpenAI"])
87
  temperature = st.slider("Temperature", 0, 100, value=50)
88
 
89
  if st.button("πŸš€ Run Chatbot"):
90
- if not video_id or not query or not language_code:
91
  st.warning("Please fill in all fields.")
92
  else:
93
- with st.spinner("Fetching transcript and generating response..."):
94
  transcript = get_transcript(video_id, language_code)
 
95
  if not transcript:
96
- st.error("Transcript not available or disabled.")
97
  else:
98
- retriever = create_vector_store(transcript).as_retriever(search_type="mmr", search_kwargs={"k": 5})
99
- relevant_docs = retriever.invoke(query)
100
- context_text = "\n\n".join(doc.page_content for doc in relevant_docs)
101
- prompt = prompt_template.invoke({"context": context_text, "question": query})
102
- model = build_model(model_choice, temperature / 100.0)
103
- response = model.invoke(prompt)
104
- st.text_area("Model Response", value=response.content, height=400)
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
 
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
4
  from langchain_community.vectorstores import FAISS
5
  from langchain.prompts import PromptTemplate
6
  import os
7
+ import requests
8
 
9
  api_key = os.getenv("HF_API_KEY")
10
+ RAPIDAPI_KEY = os.getenv("RAPIDAPI_KEY", "your-rapidapi-key-here")
11
 
12
+ # πŸ“Ό Transcript Fetcher using RapidAPI
13
  @st.cache_data
14
+ def get_transcript(video_id, language_code="en"):
15
+ url = "https://youtube-transcript3.p.rapidapi.com/api/transcript"
16
+
17
+ querystring = {"video_id": video_id, "lang": language_code}
18
+
19
+ headers = {
20
+ "x-rapidapi-key": RAPIDAPI_KEY,
21
+ "x-rapidapi-host": "youtube-transcript3.p.rapidapi.com"
22
+ }
23
+
24
  try:
25
+ response = requests.get(url, headers=headers, params=querystring, timeout=10)
26
+ if response.status_code == 200:
27
+ data = response.json()
28
+ # Combine transcript text
29
+ if isinstance(data, list):
30
+ return ' '.join([item.get('text', '') for item in data])
31
+ return None
32
+ else:
33
+ st.error(f"API Error: {response.status_code}")
34
+ return None
35
+ except Exception as e:
36
+ st.error(f"Error: {str(e)}")
 
 
 
37
  return None
38
 
39
+ # πŸ“Ό Get Available Languages (simplified - try common ones)
40
+ def get_available_languages():
41
+ return [
42
+ ("en", "English"),
43
+ ("es", "Spanish"),
44
+ ("fr", "French"),
45
+ ("de", "German"),
46
+ ("hi", "Hindi"),
47
+ ("zh", "Chinese"),
48
+ ("ja", "Japanese"),
49
+ ("ko", "Korean"),
50
+ ("pt", "Portuguese"),
51
+ ("ru", "Russian")
52
+ ]
53
+
54
  # 🧠 Embedding Loader
55
  @st.cache_resource
56
  def load_embeddings():
 
94
  # πŸš€ App UI
95
  st.title("πŸŽ₯ YouTube Transcript Chatbot")
96
 
97
+ with st.sidebar:
98
+ st.subheader("βš™οΈ API Setup")
99
+ st.info("Using RapidAPI for transcripts")
100
+ st.markdown("[Get your free API key](https://rapidapi.com/ytjar/api/youtube-transcript3)")
101
+
102
+ video_id = st.text_input("YouTube Video ID", value="lv1_-RER4_I",
103
+ help="Example: dQw4w9WgXcQ from youtube.com/watch?v=dQw4w9WgXcQ")
104
+
105
+ langs = get_available_languages()
106
+ lang_options = [f"{name} ({code})" for code, name in langs]
107
+ selected_lang = st.selectbox("Transcript Language", lang_options)
108
+ language_code = selected_lang.split("(")[-1].strip(")")
109
 
110
  query = st.text_area("Your Query", value="What is RAG?")
111
  model_choice = st.radio("Model to Use", ["DeepSeek", "OpenAI"])
112
  temperature = st.slider("Temperature", 0, 100, value=50)
113
 
114
  if st.button("πŸš€ Run Chatbot"):
115
+ if not video_id or not query:
116
  st.warning("Please fill in all fields.")
117
  else:
118
+ with st.spinner("Fetching transcript..."):
119
  transcript = get_transcript(video_id, language_code)
120
+
121
  if not transcript:
122
+ st.error("Could not fetch transcript. Make sure the video ID is correct and has captions.")
123
  else:
124
+ st.success(f"βœ… Transcript fetched! ({len(transcript)} characters)")
125
+
126
+ with st.spinner("Generating response..."):
127
+ retriever = create_vector_store(transcript).as_retriever(
128
+ search_type="mmr",
129
+ search_kwargs={"k": 5}
130
+ )
131
+ relevant_docs = retriever.invoke(query)
132
+ context_text = "\n\n".join(doc.page_content for doc in relevant_docs)
133
+
134
+ prompt = prompt_template.invoke({
135
+ "context": context_text,
136
+ "question": query
137
+ })
138
+
139
+ model = build_model(model_choice, temperature / 100.0)
140
+ response = model.invoke(prompt)
141
+
142
+ st.text_area("Model Response", value=response.content, height=400)