Spaces:

kinely
/

Humanized-AI-Text

Runtime error

App Files Files Community

kinely commited on Oct 16, 2024

Commit

98bee49

verified ·

1 Parent(s): 46d4b9a

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -0

app.py CHANGED Viewed

	@@ -0,0 +1,74 @@

+import streamlit as st
+from transformers import T5ForConditionalGeneration, T5Tokenizer
+from sentence_transformers import SentenceTransformer
+import faiss
+import torch
+# Load model and tokenizer
+model_name = "google/flan-t5-base"
+model = T5ForConditionalGeneration.from_pretrained(model_name)
+tokenizer = T5Tokenizer.from_pretrained(model_name)
+# Define your sentence transformer model for the RAG approach
+embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+# Build FAISS index (assuming you have precomputed embeddings for your retrieval corpus)
+# embeddings = ...  # Your precomputed embeddings go here
+# faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
+# faiss_index.add(embeddings)
+# Define the Streamlit interface
+st.title("Humanized Text Generator")
+# Text input from the user
+user_input = st.text_area("Enter your query here", max_chars=2000)
+# Button to generate text
+if st.button("Generate Humanized Text"):
+    if user_input:
+        # Convert user input to embedding for retrieval
+        query_embedding = embedder.encode([user_input], convert_to_tensor=True)
+        # Retrieve the top k related documents from your FAISS index
+        # _, top_k_indices = faiss_index.search(query_embedding.cpu().numpy(), k=5)
+        # Dummy document context (replace this with actual retrieved docs)
+        # context = retrieve_documents(top_k_indices)
+        context = "Sample context related to the query."  # For demonstration
+        # Concatenate query and context
+        input_text = f"{user_input} {context}"
+        # Tokenize input and generate output
+        inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512)
+        outputs = model.generate(inputs.input_ids, max_length=2000, num_return_sequences=1)
+        # Decode the generated text
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Display the generated text
+        st.write(generated_text)
+    else:
+        st.write("Please enter a query.")
+import faiss
+import numpy as np
+# Load your corpus embeddings
+# embeddings = np.load("embeddings.npy")
+# Initialize FAISS index and add the embeddings
+faiss_index = faiss.IndexFlatL2(embeddings.shape[1])  # Use L2 distance
+faiss_index.add(embeddings)
+# When you have a query, encode it and retrieve the top documents
+query_embedding = embedder.encode([user_input], convert_to_tensor=True)
+_, top_k_indices = faiss_index.search(query_embedding.cpu().numpy(), k=5)
+# Retrieve documents based on the top_k_indices
+def retrieve_documents(top_k_indices):
+    # Here, you would map the indices to the actual documents in your corpus
+    # This is just a placeholder
+    documents = ["Doc 1", "Doc 2", "Doc 3", "Doc 4", "Doc 5"]
+    return " ".join([documents[i] for i in top_k_indices[0]])