import streamlit as st from sentence_transformers import SentenceTransformer from transformers import T5Tokenizer, T5ForConditionalGeneration import faiss import numpy as np # Load SentenceTransformer model model = SentenceTransformer('all-MiniLM-L6-v2') # Prepare dataset (Wikipedia dataset or any other dataset can be used) corpus = ["Article text 1", "Article text 2", "Article text 3"] # Encode the corpus using the sentence-transformers model encoded_texts = model.encode(corpus, convert_to_numpy=True) # Create FAISS index dimension = encoded_texts.shape[1] index = faiss.IndexFlatL2(dimension) index.add(encoded_texts) # Function to retrieve top-k relevant documents from the corpus def retrieve(query, k=5): query_vector = model.encode([query], convert_to_numpy=True) distances, indices = index.search(query_vector, k) return [corpus[i] for i in indices[0]] # Function to generate a human-like response using the FLAN-T5 model def generate_response(query): retrieved_docs = retrieve(query) context = " ".join(retrieved_docs) # Load the FLAN-T5 model and tokenizer flan_t5_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base") flan_t5_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base") # Format the input for the model input_text = f"Generate a human-like response: {query}. Context: {context}" input_ids = flan_t5_tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512).input_ids # Generate text response with a length constraint generated_ids = flan_t5_model.generate(input_ids, max_length=1500) response = flan_t5_tokenizer.decode(generated_ids[0], skip_special_tokens=True) return response # Function to trim the generated text to a word limit def trim_to_word_limit(text, word_limit=1500): words = text.split() if len(words) > word_limit: return " ".join(words[:word_limit]) return text # Streamlit UI st.title("Humanized Text Generator with RAG") # Input for the query query = st.text_input("Enter your query:") # Generate button if st.button("Generate"): with st.spinner("Generating response..."): response = generate_response(query) response = trim_to_word_limit(response) st.write("### Generated Response:") st.write(response) # Additional info or about section st.write("This app uses FAISS, SentenceTransformers, and FLAN-T5 to generate contextually relevant human-like responses.")