Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os, re | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from langchain.schema import Document | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import FAISS | |
| from langchain.chains import LLMChain | |
| from langchain.prompts.chat import ( | |
| ChatPromptTemplate, | |
| SystemMessagePromptTemplate, | |
| HumanMessagePromptTemplate, | |
| ) | |
| def get_transcript(video_url): | |
| try: | |
| # Use a regular expression to extract video ID from the YouTube URL | |
| video_id_match = re.search(r"(?:https?://)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})", video_url) | |
| if not video_id_match: | |
| return "Invalid YouTube URL" | |
| video_id = video_id_match.group(1) | |
| # Fetch the transcript | |
| transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
| # Join the transcript text into a single string | |
| text = "\n".join([t["text"] for t in transcript]) | |
| return text # Return the transcript as a string | |
| except Exception as e: | |
| return f"Error fetching transcript: Unable to fetch subtitles." | |
| def create_db_from_video_url(video_url, api_key): | |
| """ | |
| Creates an Embedding of the Video and performs | |
| """ | |
| embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004", google_api_key=api_key) | |
| transcripts = get_transcript(video_url) | |
| # Convert transcript string into a Document | |
| doc_convert = Document(page_content=transcripts) | |
| # cannot provide this directly to the model so we are splitting the transcripts into small chunks | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
| docs = text_splitter.split_documents([doc_convert]) | |
| db = FAISS.from_documents(docs, embedding=embeddings) | |
| return db | |
| def get_response(video, request): | |
| """ | |
| Usind Gemini Pro to get the response. It can handle upto 32k tokens. | |
| """ | |
| API_KEY = os.environ.get("API_Key") | |
| db = create_db_from_video_url(video, API_KEY) | |
| docs = db.similarity_search(query=request, k=5) | |
| docs_content = " ".join([doc.page_content for doc in docs]) | |
| chat = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=API_KEY, convert_system_message_to_human=True) | |
| # creating a template for request | |
| template = """ | |
| You are an assistant that can answer questions about youtube videos based on | |
| video transcripts: {docs} | |
| Only use factual information from the transcript to answer the question. | |
| If you don't have enough information to answer the question, say "I don't know". | |
| Your Answers should be detailed. | |
| """ | |
| system_msg_prompt = SystemMessagePromptTemplate.from_template(template) | |
| # human prompt | |
| human_template = "Answer the following questions: {question}" | |
| human_msg_prompt = HumanMessagePromptTemplate.from_template(human_template) | |
| chat_prompt = ChatPromptTemplate.from_messages( | |
| [system_msg_prompt, human_msg_prompt] | |
| ) | |
| chain = LLMChain(llm=chat, prompt=chat_prompt) | |
| response = chain.run(question=request, docs=docs_content) | |
| return response | |
| # creating title, description for the web app | |
| title = "YouTube Video Assistant π§βπ»" | |
| description = "Answers to the Questions asked by the user on the specified YouTube video. (English Only).\n\n"\ | |
| "Click here to view [demo](https://huggingface.co/spaces/Kathir0011/YouTube_Video_Assistant/blob/main/README.md)." | |
| article = "Other Projects:<br/>"\ | |
| "π° [Health Insurance Predictor](http://health-insurance-cost-predictor-k19.streamlit.app/)<br/>"\ | |
| "π° [Fake News Detector](https://fake-news-detector-k19.streamlit.app/)<br/>"\ | |
| "πͺΆ [Birds Classifier](https://huggingface.co/spaces/Kathir0011/Birds_Classification)" | |
| # building the app | |
| youtube_video_assistant = gr.Interface( | |
| fn=get_response, | |
| inputs=[gr.Text(label="Enter the Youtube Video URL:", placeholder="Example: https://www.youtube.com/watch?v=MnDudvCyWpc"), | |
| gr.Text(label="Enter your Question", placeholder="Example: What's the video is about?")], | |
| outputs=gr.TextArea(label="Answers using Gemini-1.5-flash:"), | |
| title=title, | |
| description=description, | |
| article=article | |
| ) | |
| # launching the web app | |
| youtube_video_assistant.launch() | |