import gradio as gr from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_huggingface import HuggingFaceEmbeddings from langchain_chroma import Chroma from langchain_core.documents import Document from youtube_transcript_api import YouTubeTranscriptApi from langchain_community.document_loaders import YoutubeLoader from langchain_community.document_loaders import GoogleApiYoutubeLoader import tiktoken import os from dotenv import load_dotenv import json from groq import Groq from pydantic import BaseModel from typing import List # Load environment variables load_dotenv() groq_api_key = os.getenv("GROQ_API_KEY") os.environ["USER_AGENT"] = "RAG-chat-app" client = Groq(api_key=groq_api_key) primer = f"""You are a personal assistant. Answer any questions I have about the Youtube Video provided. Translate in specific language if user asks you to """ # Initialize Hugging Face embeddings hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Initialize ChromaDB vector store vector_store = Chroma( collection_name="data_collection", embedding_function=hf_embeddings, ) # # Load and process YouTube video # loader = YoutubeLoader.from_youtube_url("https://www.youtube.com/watch?v=e-gwvmhyU7A", add_video_info=True) # data = loader.load() # Assume this loads the transcript loader = YoutubeLoader.from_youtube_url("https://www.youtube.com/watch?v=e-gwvmhyU7A", add_video_info=True) data = loader.load() tokenizer = tiktoken.get_encoding('p50k_base') def tiktoken_len(text): tokens = tokenizer.encode( text, disallowed_special=() ) return len(tokens) # Initialize text splitter text_splitter = RecursiveCharacterTextSplitter( chunk_size=2000, chunk_overlap=100, length_function=tiktoken_len, separators=["\n\n", "\n", " ", ""] ) # Split data from YouTube video texts = text_splitter.split_documents(data) # Store documents in ChromaDB documents= [ Document( page_content=f"Source: {t.metadata['source']}, Title: {t.metadata['title']} \n\nContent: {t.page_content}", metadata=t.metadata ) for t in texts] vectorstore_from_texts = vector_store.add_documents(documents=documents) # Define function to get embeddings from Hugging Face def get_embedding(text): return hf_embeddings.embed_query(text) # Define Gradio interface function def query_model(messages): try: # Call the function for user query vector embeddings if isinstance(messages, list) and len(messages) > 0: latest_message = messages[-1]['content'] else: return "No messages provided or invalid format." raw_query_embedding= get_embedding(latest_message) # Perform similarity search with vector store results = vector_store.similarity_search_by_vector( embedding=raw_query_embedding, k=1 ) contexts = [doc.page_content for doc in results] # Prepare context for RAG augmented_query = ( "\n" + "\n\n-------\n\n".join(contexts) + "\n-------\n\n\n\n\nMY QUESTION:\n" + messages ) # Call to Groq or Hugging Face model for completion response = client.chat.completions.create( model="llama3-8b-8192", messages=[ {"role": "system", "content": primer}, {"role": "user", "content": augmented_query}, ], max_tokens=1000, temperature=1.2) return {'assistantMessage':response.choices[0].message.content} except Exception as e: return str(e) # Create Gradio interface iface = gr.Interface( fn=query_model, inputs=gr.JSON(label="Enter array of messages (JSON format)"), outputs=gr.Textbox(label="Response"), title="RAG Model", description="Retrieve and Generate responses from a YouTube video transcript." ) if __name__ == "__main__": iface.launch()