Spaces:

CryptoScoutv1
/

CryptoScout_CrewAI_TradeAdvisor

Paused

App Files Files Community

CryptoScoutv1 commited on Jan 9, 2024

Commit

de173bf

1 Parent(s): be999dd

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -10

app.py CHANGED Viewed

@@ -1,23 +1,36 @@
-from transformers import GPT2LMHeadModel, GPT2Tokenizer
 from datasets import load_dataset
 import gradio as gr
-# Load the dataset
-dataset = load_dataset("luisotorres/wikipedia-crypto-articles", split='train')  # Adjust split as needed
-# Initialize tokenizer and model for GPT-2 text generation
-tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
-model = GPT2LMHeadModel.from_pretrained('gpt2')
-# Function to generate text
 def generate_text(prompt):
     encoded_input = tokenizer(prompt, return_tensors='pt')
-    max_length = len(encoded_input["input_ids"][0]) + 50  # Example max length
     output_sequences = model.generate(input_ids=encoded_input["input_ids"], max_length=max_length)
     return tokenizer.decode(output_sequences[0], skip_special_tokens=True)
-# Creating a Gradio interface
 iface = gr.Interface(fn=generate_text, inputs="text", outputs="text")
-# Launching the interface
 iface.launch()

+from transformers import AutoTokenizer, AutoModelForCausalLM
 from datasets import load_dataset
 import gradio as gr
+# --- Model and Tokenizer Setup ---
+# Replace 'model_id' with the model of your choice from Hugging Face.
+model_id = "gpt2"  # Example: "gpt2", "EleutherAI/gpt-neo-2.7B", etc.
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+# --- Dataset Loading ---
+# Replace with your desired dataset.
+# For example, "luisotorres/wikipedia-crypto-articles", "wikitext", "imdb", etc.
+dataset_name = "luisotorres/wikipedia-crypto-articles"
+dataset_split = 'train'  # Choose from 'train', 'test', 'validation', etc.
+dataset = load_dataset(dataset_name, split=dataset_split)
+# --- Text Generation Function ---
+# This function takes a text prompt and generates a continuation.
 def generate_text(prompt):
+    # Tokenize the input prompt text
     encoded_input = tokenizer(prompt, return_tensors='pt')
+    # Generate text continuation. Adjust 'max_length' as needed.
+    max_length = len(encoded_input["input_ids"][0]) + 50
     output_sequences = model.generate(input_ids=encoded_input["input_ids"], max_length=max_length)
+    # Decode and return the generated text
     return tokenizer.decode(output_sequences[0], skip_special_tokens=True)
+# --- Gradio Interface Setup ---
+# Set up a simple Gradio interface for interacting with the model.
 iface = gr.Interface(fn=generate_text, inputs="text", outputs="text")
+# --- Launch the Interface ---
 iface.launch()