Spaces:

emaaaa543
/

testing-space

Runtime error

App Files Files Community

emaaaa543 commited on Aug 15, 2024

Commit

597c937

verified ·

1 Parent(s): 0b994e3

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -21

app.py CHANGED Viewed

@@ -32,24 +32,10 @@ vector_store = Chroma(
     embedding_function=hf_embeddings,
 )
-# Define function to split transcripts into chunks
-def split_transcript(transcript, max_chunk_size=10000):
-    chunks = []
-    current_chunk = ""
-    for line in transcript.split("\n"):
-        if len(current_chunk) + len(line) > max_chunk_size:
-            chunks.append(current_chunk)
-            current_chunk = line
-        else:
-            current_chunk += "\n" + line
-    if current_chunk:
-        chunks.append(current_chunk)
-    return chunks
 # Load and process YouTube video
-loader = YoutubeLoader.from_youtube_url("https://youtu.be/9UTQd3Oo6Kw?si=xJ9rM3gK4ERTH9c5", add_video_info=True)
-transcript = loader.load()  # Assume this loads the transcript
-data = split_transcript(transcript)
 tokenizer = tiktoken.get_encoding('p50k_base')
@@ -86,11 +72,15 @@ def get_embedding(text):
     return hf_embeddings.embed_query(text)
 # Define Gradio interface function
-def query_model(user_input):
     try:
         # Call the function for user query vector embeddings
-        raw_query_embedding = get_embedding(user_input)
         # Perform similarity search with vector store
         results = vector_store.similarity_search_by_vector(
             embedding=raw_query_embedding, k=1
@@ -103,7 +93,7 @@ def query_model(user_input):
             "<CONTEXT>\n" +
             "\n\n-------\n\n".join(contexts) +
             "\n-------\n</CONTEXT>\n\n\n\nMY QUESTION:\n" +
-            user_input
         )
         # Call to Groq or Hugging Face model for completion

     embedding_function=hf_embeddings,
 )
 # Load and process YouTube video
+loader = YoutubeLoader.from_youtube_url("https://www.youtube.com/watch?v=e-gwvmhyU7A", add_video_info=True)
+data = loader.load()  # Assume this loads the transcript
 tokenizer = tiktoken.get_encoding('p50k_base')
     return hf_embeddings.embed_query(text)
 # Define Gradio interface function
+def query_model(messages):
     try:
         # Call the function for user query vector embeddings
+        if isinstance(messages, list) and len(messages) > 0:
+            latest_message = messages[-1]['content']
+        else:
+            return "No messages provided or invalid format."
+        raw_query_embedding= get_embedding(latest_message)
         # Perform similarity search with vector store
         results = vector_store.similarity_search_by_vector(
             embedding=raw_query_embedding, k=1
             "<CONTEXT>\n" +
             "\n\n-------\n\n".join(contexts) +
             "\n-------\n</CONTEXT>\n\n\n\nMY QUESTION:\n" +
+            messages
         )
         # Call to Groq or Hugging Face model for completion