Spaces:

kinely
/

Humanized-AI-Text

Runtime error

App Files Files Community

kinely commited on Oct 16, 2024

Commit

4b8e1c7

verified ·

1 Parent(s): b0f9153

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -79

app.py CHANGED Viewed

@@ -1,96 +1,37 @@
-import streamlit as st
-from transformers import T5ForConditionalGeneration, T5Tokenizer
-from sentence_transformers import SentenceTransformer
-import faiss
-import torch
-import wikipediaapi
-# Initialize Wikipedia API with a custom user-agent
-wiki_wiki = wikipediaapi.Wikipedia(
-    language='en',
-    user_agent='HumanizedTextApp/1.0 (kinelyaydenseo19@gmail.com)'
-)
-# Function to fetch content from Wikipedia
-def fetch_wikipedia_articles(titles):
-    corpus = []
-    for title in titles:
-        page = wiki_wiki.page(title)
-        if page.exists():
-            corpus.append(page.text)
-        else:
-            st.write(f"Page for '{title}' does not exist.")
-    return corpus
-# Initialize SentenceTransformer for embeddings
-embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
-# Fetch and create the corpus
-titles = [
-    "Crypto",
-    "Finance",
-    "Technology",
-    "Healthcare",
-    "Education"
-]
-st.write("Fetching Wikipedia articles...")
-corpus = fetch_wikipedia_articles(titles)
-# Check if corpus is populated
-if not corpus:
-    st.write("No articles found. Please check the titles.")
-else:
-    st.write("Articles fetched successfully.")
-# Generate embeddings for the corpus
-st.write("Generating embeddings...")
-embeddings = embedder.encode(corpus, convert_to_tensor=True)
-embeddings_np = embeddings.cpu().numpy()
-# Initialize FAISS index and add embeddings
-faiss_index = faiss.IndexFlatL2(embeddings_np.shape[1])
-faiss_index.add(embeddings_np)
-# Load model and tokenizer
-model_name = "google/flan-t5-base"
-model = T5ForConditionalGeneration.from_pretrained(model_name)
-tokenizer = T5Tokenizer.from_pretrained(model_name)
 # Streamlit interface
-st.title("Humanized AI Text Generator")
 # Input from the user
-user_input = st.text_area("Enter your query here (e.g., about a country, concept, etc.)", height=200)
 if st.button("Generate Humanized Text"):
     if user_input.strip():
-        # Retrieve context from FAISS based on user input embedding
-        query_embedding = embedder.encode([user_input], convert_to_tensor=True)
-        _, top_k_indices = faiss_index.search(query_embedding.cpu().numpy(), k=5)
-        # Retrieve documents based on FAISS top_k_indices
-        def retrieve_documents(top_k_indices):
-            return " ".join([corpus[i] for i in top_k_indices[0]])
-        context = retrieve_documents(top_k_indices)
-        # Check if context is empty
-        if not context:
-            st.write("No relevant context found. Please try a different query.")
-        else:
-            # Concatenate user input and context for model input
-            input_text = f"{user_input} {context}"
-            # Tokenize input and handle truncation
-            inputs = tokenizer(input_text, return_tensors="pt", max_length=1024, truncation=True)
-            # Generate output
             outputs = model.generate(inputs.input_ids, max_length=2000, num_return_sequences=1)
-            # Decode the generated text
-            generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-            # Display the generated text
-            st.write(generated_text)
     else:
         st.write("Please enter a valid query.")

+from datasets import load_dataset
+# Load the BookCorpus74M dataset
+ds = load_dataset("raddwolf/BookCorpus74M")
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Load the tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained("FabbriSimo01/Facebook_opt_1.3b_Quantized")
+model = AutoModelForCausalLM.from_pretrained("FabbriSimo01/Facebook_opt_1.3b_Quantized")
+import streamlit as st
+import torch
 # Streamlit interface
+st.title("Humanized Text Generation App")
 # Input from the user
+user_input = st.text_area("Enter your query here:", height=200)
 if st.button("Generate Humanized Text"):
     if user_input.strip():
+        # Prepare the input
+        inputs = tokenizer(user_input, return_tensors="pt", truncation=True, max_length=512)
+        # Generate output
+        with torch.no_grad():
             outputs = model.generate(inputs.input_ids, max_length=2000, num_return_sequences=1)
+        # Decode the generated text
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Display the generated text
+        st.write(generated_text)
     else:
         st.write("Please enter a valid query.")