emaaaa543 commited on
Commit
597c937
·
verified ·
1 Parent(s): 0b994e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -21
app.py CHANGED
@@ -32,24 +32,10 @@ vector_store = Chroma(
32
  embedding_function=hf_embeddings,
33
  )
34
 
35
- # Define function to split transcripts into chunks
36
- def split_transcript(transcript, max_chunk_size=10000):
37
- chunks = []
38
- current_chunk = ""
39
- for line in transcript.split("\n"):
40
- if len(current_chunk) + len(line) > max_chunk_size:
41
- chunks.append(current_chunk)
42
- current_chunk = line
43
- else:
44
- current_chunk += "\n" + line
45
- if current_chunk:
46
- chunks.append(current_chunk)
47
- return chunks
48
-
49
  # Load and process YouTube video
50
- loader = YoutubeLoader.from_youtube_url("https://youtu.be/9UTQd3Oo6Kw?si=xJ9rM3gK4ERTH9c5", add_video_info=True)
51
- transcript = loader.load() # Assume this loads the transcript
52
- data = split_transcript(transcript)
53
 
54
  tokenizer = tiktoken.get_encoding('p50k_base')
55
 
@@ -86,11 +72,15 @@ def get_embedding(text):
86
  return hf_embeddings.embed_query(text)
87
 
88
  # Define Gradio interface function
89
- def query_model(user_input):
90
  try:
91
  # Call the function for user query vector embeddings
92
- raw_query_embedding = get_embedding(user_input)
93
-
 
 
 
 
94
  # Perform similarity search with vector store
95
  results = vector_store.similarity_search_by_vector(
96
  embedding=raw_query_embedding, k=1
@@ -103,7 +93,7 @@ def query_model(user_input):
103
  "<CONTEXT>\n" +
104
  "\n\n-------\n\n".join(contexts) +
105
  "\n-------\n</CONTEXT>\n\n\n\nMY QUESTION:\n" +
106
- user_input
107
  )
108
 
109
  # Call to Groq or Hugging Face model for completion
 
32
  embedding_function=hf_embeddings,
33
  )
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  # Load and process YouTube video
36
+ loader = YoutubeLoader.from_youtube_url("https://www.youtube.com/watch?v=e-gwvmhyU7A", add_video_info=True)
37
+ data = loader.load() # Assume this loads the transcript
38
+
39
 
40
  tokenizer = tiktoken.get_encoding('p50k_base')
41
 
 
72
  return hf_embeddings.embed_query(text)
73
 
74
  # Define Gradio interface function
75
+ def query_model(messages):
76
  try:
77
  # Call the function for user query vector embeddings
78
+ if isinstance(messages, list) and len(messages) > 0:
79
+ latest_message = messages[-1]['content']
80
+ else:
81
+ return "No messages provided or invalid format."
82
+
83
+ raw_query_embedding= get_embedding(latest_message)
84
  # Perform similarity search with vector store
85
  results = vector_store.similarity_search_by_vector(
86
  embedding=raw_query_embedding, k=1
 
93
  "<CONTEXT>\n" +
94
  "\n\n-------\n\n".join(contexts) +
95
  "\n-------\n</CONTEXT>\n\n\n\nMY QUESTION:\n" +
96
+ messages
97
  )
98
 
99
  # Call to Groq or Hugging Face model for completion