import os import torch import torch.nn as nn import torch.optim as optim import streamlit as st import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity # === Neural Network for Chatbot === class ChatBotNN(nn.Module): def __init__(self, input_dim, hidden_dim, output_dim): super(ChatBotNN, self).__init__() self.fc1 = nn.Linear(input_dim, hidden_dim) self.fc2 = nn.Linear(hidden_dim, output_dim) self.activation = nn.ReLU() def forward(self, x): x = self.activation(self.fc1(x)) x = self.fc2(x) return x # === Helper Functions === def process_text_file(file_path): """ Process a plain text file into a list of sentences. Each line in the text file is treated as one sentence. """ with open(file_path, "r") as f: lines = f.readlines() return [line.strip() for line in lines if line.strip()] def process_csv_file(file_path): """ Process a CSV file into a list of sentences. Assumes the CSV has a column named 'text'. """ data = pd.read_csv(file_path) if 'text' in data.columns: return data['text'].dropna().tolist() else: raise ValueError("CSV file must have a 'text' column.") # === Training Data === corpus = [] vectorizer = TfidfVectorizer() def train_bot(file_path, file_type): """ Train the chatbot by adding content from the uploaded file to the corpus. """ global corpus if file_type == "txt": corpus += process_text_file(file_path) elif file_type == "csv": corpus += process_csv_file(file_path) else: raise ValueError("Unsupported file type. Use .txt or .csv.") # Fit the vectorizer to the updated corpus vectorizer.fit(corpus) def generate_response(user_input): """ Generate a chatbot response based on the trained corpus using cosine similarity. """ if not corpus: return "I don't know much yet. Please upload some files to teach me!" # Vectorize user input and the corpus user_vector = vectorizer.transform([user_input]) corpus_vectors = vectorizer.transform(corpus) # Compute cosine similarity similarities = cosine_similarity(user_vector, corpus_vectors) most_similar_idx = similarities.argmax() return corpus[most_similar_idx] # === Streamlit App === st.title("Chatbot Trainer with File Uploads") st.write(""" ### How it Works: 1. Upload `.txt` or `.csv` files to teach the chatbot. - **.txt**: Each line represents one training sentence. - **.csv**: Must have a column named `text` for training sentences. 2. Interact with the chatbot in real-time. 3. Watch the chatbot improve as you train it with more files! """) uploaded_file = st.file_uploader("Upload a file (.txt or .csv)", type=["txt", "csv"]) if uploaded_file is not None: # Save the uploaded file locally file_path = os.path.join("uploads", uploaded_file.name) os.makedirs("uploads", exist_ok=True) with open(file_path, "wb") as f: f.write(uploaded_file.getbuffer()) # Train the chatbot on the uploaded file st.write("Training the chatbot with the uploaded file...") file_extension = uploaded_file.name.split(".")[-1] train_bot(file_path, file_extension) st.success("Training complete!") # Chat Interface st.write("### Chat with the Bot!") user_input = st.text_input("You:", placeholder="Type something to chat...") if user_input: response = generate_response(user_input) st.write(f"**Bot:** {response}")