Capstone_Project3

Sleeping

App Files Files Community

elinstallation commited on Aug 13, 2025

Commit

932d832

verified ·

1 Parent(s): 71bf6cb

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -153

app.py CHANGED Viewed

@@ -7,12 +7,12 @@ from sentence_transformers import SentenceTransformer
 import torch
 with open("poverty_and_education.txt", "r", encoding="utf-8") as file:
-  # Read the entire contents of the file and store it in a variable
-  poverty_and_education = file.read()
 with open("academic_tips_text.txt", "r", encoding="utf-8") as file:
-  # Read the entire contents of the file and store it in a variable
-  acadenic_tips_text = file.read()
 # Print the text below
@@ -22,30 +22,31 @@ print(poverty_and_education)
 ### STEP 3
 def preprocess_text(text):
-  # Strip extra whitespace from the beginning and the end of the text
-  cleaned_text = text.strip()
-  # Split the cleaned_text by every newline character (\n)
-  chunks = cleaned_text.split("\n")
-  # Create an empty list to store cleaned chunks
-  cleaned_chunks = []
-  # Write your for-in loop below to clean each chunk and add it to the cleaned_chunks list
-  for chunk in chunks:
-    stripped_chunk = chunk.strip()
-    if len(stripped_chunk) > 0:
-      cleaned_chunks.append(stripped_chunk)
-  # Print cleaned_chunks
-  print(cleaned_chunks)
-  # Print the length of cleaned_chunks
-  num_of_chunks = print(len(cleaned_chunks))
-  print(f"There are {num_of_chunks} amount of chunks")
-  # Return the cleaned_chunks
-  return cleaned_chunks
 # Load the pre-trained embedding model that converts text to vectors
@@ -53,17 +54,17 @@ model = SentenceTransformer('all-MiniLM-L6-v2')
 ### STEP 4
 def create_embeddings(text_chunks):
-  # Convert each text chunk into a vector embedding and store as a tensor
-  chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True) # Replace ... with the text_chunks list
-  # Print the chunk embeddings
-  print(chunk_embeddings)
-  # Print the shape of chunk_embeddings
-  print(chunk_embeddings.shape)
-  # Return the chunk_embeddings
-  return chunk_embeddings
 # Call the create_embeddings function and store the result in a new chunk_embeddings variable
 #chunk_embeddings = create_embeddings(cleaned_chunks) # Complete this line
@@ -71,37 +72,37 @@ def create_embeddings(text_chunks):
 ###STEP 5
 # Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
 def get_top_chunks(query, chunk_embeddings, text_chunks):
-  # Convert the query text into a vector embedding
-  query_embedding = model.encode(query, convert_to_tensor = True) # Complete this line
-  # Normalize the query embedding to unit length for accurate similarity comparison
-  query_embedding_normalized = query_embedding / query_embedding.norm()
-  # Normalize all chunk embeddings to unit length for consistent comparison
-  chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
-  # Calculate cosine similarity between query and all chunks using matrix multiplication
-  similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized) # Complete this line
-  # Print the similarities
-  print(similarities)
-  # Find the indices of the 3 chunks with highest similarity scores
-  top_indices = torch.topk(similarities, k=3).indices
-  # Print the top indices
-  print(top_indices)
-  # Create an empty list to store the most relevant chunks
-  top_chunks = []
-  # Loop through the top indices and retrieve the corresponding text chunks
-  for i in top_indices:
-    relevant_info = cleaned_chunks[i]
-    top_chunks.append(relevant_info)
-  # Return the list of most relevant chunks
-  return top_chunks
 # Print the top results
 #print(top_results)
@@ -111,19 +112,18 @@ chunk_embeddings = create_embeddings(cleaned_chunks)
 client= InferenceClient("Qwen/Qwen2.5-7B-Instruct-1M")
 #defining role of AI and user
-# i moved it to the bottom
-#def respond(message,history):
- #   messages = [{"role": "assistant", "content": "You are a friendly chatbot."}]
-  #  if history:
-   #     messages.extend(history) #keep adding history
-    #messages.append({"role":"user", "content": message})
-    #response=client.chat_completion(messages, max_tokens=100) #capping how many words the LLM is allowed to generate as a respond (100 words)
-#    return response['choices'][0]['message']['content'].strip() #storing value of response in a readable format to display
 ### STEP 6
 # Call the preprocess_text function and store the result in a cleaned_chunks variable
@@ -134,100 +134,10 @@ print(top_results)
 #Defining chatbot giving user a UI to interact, see their conversation history, and see new messages using built in gr feature
 #ChatInterface requires at least one parameter(a function)
-#chatbot = gr.ChatInterface(respond,type="messages", title="AI Chatbot", theme="Taithrah/Minimal")
-# INTERFACE EDITS #
-custom_css = """
-#chatbox {background-color: #ffffff; border-radius: 10px; padding: 10px;}
-#chatbox .message.user {background-color: #EDE7F6; color: #4A148C; border-radius: 20px; padding: 10px; margin: 5px; max-width: 75%;}
-#chatbox .message.bot {background-color: #F3E5F5; color: #4A148C; border-radius: 20px; padding: 10px; margin: 5px; max-width: 75%;}
-#header {background-color: #8E24AA; color: white; padding: 12px; border-radius: 12px 12px 0 0; font-weight: bold;}
-/* Input bar test */
-.input-container {
-    display: flex;
-    align-items: center;
-    background-color: white;
-    border: 1px solid #ccc;
-    border-radius: 25px;
-    padding: 5px 10px;
-    width: 100%;
-}
-.input-container input {
-    border: none;
-    outline: none;
-    flex: 1;
-    font-size: 14px;
-}
-.input-container button {
-    background-color: #8E24AA;
-    color: white;
-    border: none;
-    border-radius: 50%;
-    width: 35px;
-    height: 35px;
-    cursor: pointer;
-}
-"""
-def respond(message, history):
-    # Prepare messages for the API
-    messages = [{"role": "assistant", "content": "You are a friendly chatbot."}]
-    if history:
-        # Convert Gradio history into API format
-        for user_msg, bot_msg in history:
-            messages.append({"role": "user", "content": user_msg})
-            messages.append({"role": "assistant", "content": bot_msg})
-    messages.append({"role": "user", "content": message})
-    # Call the API
-    response = client.chat_completion(messages, max_tokens=100)
-    assistant_reply = response['choices'][0]['message']['content'].strip()
-    # Return for Gradio
-    return history + [(message, assistant_reply)], ""
-with gr.Blocks(css=custom_css) as demo:
-    gr.HTML("<div id='header'>DivaBot</div>")
-    chatbot = gr.Chatbot(elem_id="chatbox", height=400)
-    # Hidden textbox to store the message
-    msg = gr.Textbox(visible=False)
-    # Visible custom input bar with send button
-    gr.HTML("""
-    <div class="input-container">
-        <input id="user-input" placeholder="Type your message..." />
-        <button id="send-btn">➤</button>
-    </div>
-    <script>
-        const sendBtn = document.getElementById('send-btn');
-        const userInput = document.getElementById('user-input');
-        sendBtn.onclick = () => {
-            const value = userInput.value;
-            if (value.trim() !== "") {
-                // Set the hidden Gradio textbox value
-                const textbox = document.querySelector('textarea');
-                textbox.value = value;
-                textbox.dispatchEvent(new Event('input', { bubbles: true }));
-                // Trigger submit
-                document.querySelector('textarea').closest('form').dispatchEvent(new Event('submit', { bubbles: true }));
-                userInput.value = "";
-            }
-        };
-        userInput.addEventListener("keypress", function(e) {
-            if (e.key === "Enter") {
-                sendBtn.click();
-                e.preventDefault();
-            }
-        });
-    </script>
-    """)
-    msg.submit(respond, [msg, chatbot], [chatbot, msg])
-demo.launch()
 #You may run into errors when you're trying different models. To see the error messages, set debug to True in launch()

 import torch
 with open("poverty_and_education.txt", "r", encoding="utf-8") as file:
+    # Read the entire contents of the file and store it in a variable
+    poverty_and_education = file.read()
 with open("academic_tips_text.txt", "r", encoding="utf-8") as file:
+    # Read the entire contents of the file and store it in a variable
+    acadenic_tips_text = file.read()
 # Print the text below
 ### STEP 3
 def preprocess_text(text):
+    # Strip extra whitespace from the beginning and the end of the text
+    cleaned_text = text.strip()
+    # Split the cleaned_text by every newline character (\n)
+    chunks = cleaned_text.split("\n")
+    # Create an empty list to store cleaned chunks
+    cleaned_chunks = []
+    # Write your for-in loop below to clean each chunk and add it to the cleaned_chunks list
+    for chunk in chunks:
+        stripped_chunk = chunk.strip()
+        if len(stripped_chunk) > 0:
+            cleaned_chunks.append(stripped_chunk)
+    # Print cleaned_chunks
+    print(cleaned_chunks)
+    # Print the length of cleaned_chunks
+    num_of_chunks = len(cleaned_chunks)
+    print(num_of_chunks)
+    print(f"There are {num_of_chunks} amount of chunks")
+    # Return the cleaned_chunks
+    return cleaned_chunks
 # Load the pre-trained embedding model that converts text to vectors
 ### STEP 4
 def create_embeddings(text_chunks):
+    # Convert each text chunk into a vector embedding and store as a tensor
+    chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True) # Replace ... with the text_chunks list
+    # Print the chunk embeddings
+    print(chunk_embeddings)
+    # Print the shape of chunk_embeddings
+    print(chunk_embeddings.shape)
+    # Return the chunk_embeddings
+    return chunk_embeddings
 # Call the create_embeddings function and store the result in a new chunk_embeddings variable
 #chunk_embeddings = create_embeddings(cleaned_chunks) # Complete this line
 ###STEP 5
 # Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
 def get_top_chunks(query, chunk_embeddings, text_chunks):
+    # Convert the query text into a vector embedding
+    query_embedding = model.encode(query, convert_to_tensor = True) # Complete this line
+    # Normalize the query embedding to unit length for accurate similarity comparison
+    query_embedding_normalized = query_embedding / query_embedding.norm()
+    # Normalize all chunk embeddings to unit length for consistent comparison
+    chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
+    # Calculate cosine similarity between query and all chunks using matrix multiplication
+    similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized) # Complete this line
+    # Print the similarities
+    print(similarities)
+    # Find the indices of the 3 chunks with highest similarity scores
+    top_indices = torch.topk(similarities, k=3).indices
+    # Print the top indices
+    print(top_indices)
+    # Create an empty list to store the most relevant chunks
+    top_chunks = []
+    # Loop through the top indices and retrieve the corresponding text chunks
+    for i in top_indices:
+        relevant_info = cleaned_chunks[i]
+        top_chunks.append(relevant_info)
+    # Return the list of most relevant chunks
+    return top_chunks
 # Print the top results
 #print(top_results)
 client= InferenceClient("Qwen/Qwen2.5-7B-Instruct-1M")
 #defining role of AI and user
+def respond(message,history):
+    messages = [{"role": "assistant", "content": "You are a friendly chatbot."}]
+    if history:
+        messages.extend(history) #keep adding history
+    messages.append({"role":"user", "content": message})
+    response=client.chat_completion(messages, max_tokens=100) #capping how many words the LLM is allowed to generate as a respond (100 words)
+    return response['choices'][0]['message']['content'].strip() #storing value of response in a readable format to display
 ### STEP 6
 # Call the preprocess_text function and store the result in a cleaned_chunks variable
 #Defining chatbot giving user a UI to interact, see their conversation history, and see new messages using built in gr feature
 #ChatInterface requires at least one parameter(a function)
+chatbot = gr.ChatInterface(respond,type="messages", title="AI Chatbot", theme="Taithrah/Minimal")
+#launching chatbot
+chatbot.launch()
 #You may run into errors when you're trying different models. To see the error messages, set debug to True in launch()