kinely's picture
Update app.py
505df3c verified
import streamlit as st
from sentence_transformers import SentenceTransformer
from transformers import T5Tokenizer, T5ForConditionalGeneration
import faiss
import numpy as np
# Load SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')
# Prepare dataset (Wikipedia dataset or any other dataset can be used)
corpus = ["Article text 1", "Article text 2", "Article text 3"]
# Encode the corpus using the sentence-transformers model
encoded_texts = model.encode(corpus, convert_to_numpy=True)
# Create FAISS index
dimension = encoded_texts.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(encoded_texts)
# Function to retrieve top-k relevant documents from the corpus
def retrieve(query, k=5):
query_vector = model.encode([query], convert_to_numpy=True)
distances, indices = index.search(query_vector, k)
return [corpus[i] for i in indices[0]]
# Function to generate a human-like response using the FLAN-T5 model
def generate_response(query):
retrieved_docs = retrieve(query)
context = " ".join(retrieved_docs)
# Load the FLAN-T5 model and tokenizer
flan_t5_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
flan_t5_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
# Format the input for the model
input_text = f"Generate a human-like response: {query}. Context: {context}"
input_ids = flan_t5_tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512).input_ids
# Generate text response with a length constraint
generated_ids = flan_t5_model.generate(input_ids, max_length=1500)
response = flan_t5_tokenizer.decode(generated_ids[0], skip_special_tokens=True)
return response
# Function to trim the generated text to a word limit
def trim_to_word_limit(text, word_limit=1500):
words = text.split()
if len(words) > word_limit:
return " ".join(words[:word_limit])
return text
# Streamlit UI
st.title("Humanized Text Generator with RAG")
# Input for the query
query = st.text_input("Enter your query:")
# Generate button
if st.button("Generate"):
with st.spinner("Generating response..."):
response = generate_response(query)
response = trim_to_word_limit(response)
st.write("### Generated Response:")
st.write(response)
# Additional info or about section
st.write("This app uses FAISS, SentenceTransformers, and FLAN-T5 to generate contextually relevant human-like responses.")