Spaces:

admin08077
/

model

Sleeping

File size: 3,602 Bytes

import os
import torch
import torch.nn as nn
import torch.optim as optim
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# === Neural Network for Chatbot ===
class ChatBotNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(ChatBotNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.activation = nn.ReLU()
    
    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.fc2(x)
        return x

# === Helper Functions ===
def process_text_file(file_path):
    """
    Process a plain text file into a list of sentences.
    Each line in the text file is treated as one sentence.
    """
    with open(file_path, "r") as f:
        lines = f.readlines()
    return [line.strip() for line in lines if line.strip()]

def process_csv_file(file_path):
    """
    Process a CSV file into a list of sentences.
    Assumes the CSV has a column named 'text'.
    """
    data = pd.read_csv(file_path)
    if 'text' in data.columns:
        return data['text'].dropna().tolist()
    else:
        raise ValueError("CSV file must have a 'text' column.")

# === Training Data ===
corpus = []
vectorizer = TfidfVectorizer()

def train_bot(file_path, file_type):
    """
    Train the chatbot by adding content from the uploaded file to the corpus.
    """
    global corpus
    if file_type == "txt":
        corpus += process_text_file(file_path)
    elif file_type == "csv":
        corpus += process_csv_file(file_path)
    else:
        raise ValueError("Unsupported file type. Use .txt or .csv.")
    
    # Fit the vectorizer to the updated corpus
    vectorizer.fit(corpus)

def generate_response(user_input):
    """
    Generate a chatbot response based on the trained corpus using cosine similarity.
    """
    if not corpus:
        return "I don't know much yet. Please upload some files to teach me!"
    
    # Vectorize user input and the corpus
    user_vector = vectorizer.transform([user_input])
    corpus_vectors = vectorizer.transform(corpus)
    
    # Compute cosine similarity
    similarities = cosine_similarity(user_vector, corpus_vectors)
    most_similar_idx = similarities.argmax()
    return corpus[most_similar_idx]

# === Streamlit App ===
st.title("Chatbot Trainer with File Uploads")
st.write("""
### How it Works:
1. Upload `.txt` or `.csv` files to teach the chatbot.
   - **.txt**: Each line represents one training sentence.
   - **.csv**: Must have a column named `text` for training sentences.
2. Interact with the chatbot in real-time.
3. Watch the chatbot improve as you train it with more files!
""")

uploaded_file = st.file_uploader("Upload a file (.txt or .csv)", type=["txt", "csv"])

if uploaded_file is not None:
    # Save the uploaded file locally
    file_path = os.path.join("uploads", uploaded_file.name)
    os.makedirs("uploads", exist_ok=True)
    with open(file_path, "wb") as f:
        f.write(uploaded_file.getbuffer())
    
    # Train the chatbot on the uploaded file
    st.write("Training the chatbot with the uploaded file...")
    file_extension = uploaded_file.name.split(".")[-1]
    train_bot(file_path, file_extension)
    st.success("Training complete!")

# Chat Interface
st.write("### Chat with the Bot!")
user_input = st.text_input("You:", placeholder="Type something to chat...")

if user_input:
    response = generate_response(user_input)
    st.write(f"**Bot:** {response}")