Spaces:

louiecerv
/

hf_hub_api_demo

Sleeping

louiecerv commited on Jan 25, 2025

Commit

fa189e6

1 Parent(s): db45b4c

sync with remove

Files changed (5) hide show

app.py ADDED Viewed

+import streamlit as st
+from transformers import pipeline
+import torch
+# Force CPU usage
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model_name = "louiecerv/sentiment_analysis_model"
+classifier = pipeline("text-classification", model=model_name, tokenizer=model_name, device=0 if device == "cuda" else -1)
+print(f"Using device: {device}")
+# Streamlit UI
+st.title("Sentiment Analysis App")
+st.write("Enter a movie review and get its sentiment.")
+user_input = st.text_area("Enter review:")
+if st.button("Analyze"):
+    if user_input:
+        prediction = classifier(user_input)
+        label = prediction[0]['label']
+        confidence = prediction[0]['score']
+        st.write(f"### Sentiment: {label}")
+        st.write(f"Confidence: {confidence:.2f}")
+    else:
+        st.warning("Please enter a review.")

dataset.py ADDED Viewed

+from datasets import Dataset, DatasetDict
+from huggingface_hub import HfApi
+# Create the dataset
+data = [
+    {"text": "I loved this movie! It was fantastic!", "label": 1},
+    {"text": "Terrible film. Would not recommend.", "label": 0},
+    {"text": "Amazing cinematography, but the plot was weak.", "label": 1},
+    {"text": "I fell asleep halfway through. Very boring.", "label": 0}
+]
+dataset = Dataset.from_list(data)
+# Push dataset to Hugging Face
+dataset.push_to_hub("louiecerv/sentiment_analysis")

requirements.txt ADDED Viewed

+transformers
+datasets
+torch
+streamlit
+huggingface_hub

train_model.py ADDED Viewed

+from datasets import load_dataset
+from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
+from transformers import AutoTokenizer
+import torch
+# Load the dataset
+dataset = load_dataset("louiecerv/sentiment_analysis")
+# Load tokenizer
+model_checkpoint = "distilbert-base-uncased"
+tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+# Tokenize function
+def tokenize_function(examples):
+    return tokenizer(examples["text"], padding="max_length", truncation=True)
+tokenized_datasets = dataset.map(tokenize_function, batched=True)
+# Prepare dataset for training
+train_dataset = tokenized_datasets["train"]
+# Load model
+model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
+# Training arguments
+training_args = TrainingArguments(
+    output_dir="./results",
+    eval_strategy="no",
+    per_device_train_batch_size=8,
+    per_device_eval_batch_size=8,
+    num_train_epochs=3,
+    save_strategy="epoch",
+    push_to_hub=True,
+    hub_model_id="louiecerv/sentiment_analysis_model"
+)
+# Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_dataset
+)
+# Train and save model
+trainer.train()
+trainer.push_to_hub()

upload_tokenizer.py ADDED Viewed

+from transformers import AutoTokenizer
+model_checkpoint = "distilbert-base-uncased"
+tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+# Push tokenizer to the model repo
+tokenizer.push_to_hub("louiecerv/sentiment_analysis_model")