Spaces:
Sleeping
Sleeping
File size: 3,602 Bytes
806b790 56060c4 7c5377d 806b790 7c5377d 806b790 7c5377d 806b790 7c5377d 806b790 7c5377d 806b790 7c5377d 806b790 56060c4 7c5377d 806b790 7c5377d 806b790 7c5377d 806b790 7c5377d 806b790 7c5377d 806b790 7c5377d 806b790 7c5377d 806b790 7c5377d 806b790 7c5377d 806b790 7c5377d 806b790 7c5377d 806b790 7c5377d 806b790 7c5377d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import os
import torch
import torch.nn as nn
import torch.optim as optim
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# === Neural Network for Chatbot ===
class ChatBotNN(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
super(ChatBotNN, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, output_dim)
self.activation = nn.ReLU()
def forward(self, x):
x = self.activation(self.fc1(x))
x = self.fc2(x)
return x
# === Helper Functions ===
def process_text_file(file_path):
"""
Process a plain text file into a list of sentences.
Each line in the text file is treated as one sentence.
"""
with open(file_path, "r") as f:
lines = f.readlines()
return [line.strip() for line in lines if line.strip()]
def process_csv_file(file_path):
"""
Process a CSV file into a list of sentences.
Assumes the CSV has a column named 'text'.
"""
data = pd.read_csv(file_path)
if 'text' in data.columns:
return data['text'].dropna().tolist()
else:
raise ValueError("CSV file must have a 'text' column.")
# === Training Data ===
corpus = []
vectorizer = TfidfVectorizer()
def train_bot(file_path, file_type):
"""
Train the chatbot by adding content from the uploaded file to the corpus.
"""
global corpus
if file_type == "txt":
corpus += process_text_file(file_path)
elif file_type == "csv":
corpus += process_csv_file(file_path)
else:
raise ValueError("Unsupported file type. Use .txt or .csv.")
# Fit the vectorizer to the updated corpus
vectorizer.fit(corpus)
def generate_response(user_input):
"""
Generate a chatbot response based on the trained corpus using cosine similarity.
"""
if not corpus:
return "I don't know much yet. Please upload some files to teach me!"
# Vectorize user input and the corpus
user_vector = vectorizer.transform([user_input])
corpus_vectors = vectorizer.transform(corpus)
# Compute cosine similarity
similarities = cosine_similarity(user_vector, corpus_vectors)
most_similar_idx = similarities.argmax()
return corpus[most_similar_idx]
# === Streamlit App ===
st.title("Chatbot Trainer with File Uploads")
st.write("""
### How it Works:
1. Upload `.txt` or `.csv` files to teach the chatbot.
- **.txt**: Each line represents one training sentence.
- **.csv**: Must have a column named `text` for training sentences.
2. Interact with the chatbot in real-time.
3. Watch the chatbot improve as you train it with more files!
""")
uploaded_file = st.file_uploader("Upload a file (.txt or .csv)", type=["txt", "csv"])
if uploaded_file is not None:
# Save the uploaded file locally
file_path = os.path.join("uploads", uploaded_file.name)
os.makedirs("uploads", exist_ok=True)
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
# Train the chatbot on the uploaded file
st.write("Training the chatbot with the uploaded file...")
file_extension = uploaded_file.name.split(".")[-1]
train_bot(file_path, file_extension)
st.success("Training complete!")
# Chat Interface
st.write("### Chat with the Bot!")
user_input = st.text_input("You:", placeholder="Type something to chat...")
if user_input:
response = generate_response(user_input)
st.write(f"**Bot:** {response}")
|