Spaces:

houmanrajabi
/

CoinPulse

Sleeping

App Files Files Community

CoinPulse / app.py

houmanrajabi

Create app.py

a0247f0 verified 5 months ago

raw

history blame contribute delete

8.77 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from datasets import Dataset, load_dataset
	from huggingface_hub import HfApi
	import os
	from datetime import datetime

	# Load model and tokenizer
	MODEL_PATH = "houmanrajabi/CoinPulse"
	tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)

	# Set model to evaluation mode
	model.eval()

	# Label mapping from config
	id2label = {0: 'negative', 1: 'neutral', 2: 'positive'}

	# HF Dataset configuration for flagging
	HF_TOKEN = os.getenv("HF_TOKEN")
	DATASET_REPO = "houmanrajabi/coinpulse-flagged-data"

	def predict_sentiment(text, temperature=2.0):
	if not text.strip():
	return {"error": "Please enter some text"}

	# Tokenize input with extended vocabulary
	inputs = tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	max_length=512,
	padding=True
	)

	# Get prediction
	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits / temperature
	probs = torch.softmax(logits, dim=1)[0]

	# Create results dictionary
	results = {
	id2label[0]: float(probs[0]),
	id2label[1]: float(probs[1]),
	id2label[2]: float(probs[2])
	}

	return results

	def save_flagged_data(text_input, temperature_input, prediction_output,
	sentiment_label, issue_type):
	"""Save flagged data to HF Dataset or CSV"""
	try:
	# Combine flags
	flags = []
	if sentiment_label:
	flags.append(sentiment_label)
	if issue_type:
	flags.append(issue_type)
	flag_combined = " + ".join(flags) if flags else "flagged"

	if HF_TOKEN:
	try:
	# Load existing dataset or create new one
	try:
	dataset = load_dataset(DATASET_REPO, split="train", token=HF_TOKEN)
	data_dict = dataset.to_dict()
	except Exception as e:
	print(f"Creating new dataset error: {e}")
	data_dict = {
	"text": [],
	"temperature": [],
	"prediction": [],
	"sentiment_label": [],
	"issue_type": [],
	"flag_combined": [],
	"timestamp": []
	}

	# Add new flagged data
	data_dict["text"].append(str(text_input))
	data_dict["temperature"].append(float(temperature_input))
	data_dict["prediction"].append(str(prediction_output))
	data_dict["sentiment_label"].append(str(sentiment_label) if sentiment_label else "")
	data_dict["issue_type"].append(str(issue_type) if issue_type else "")
	data_dict["flag_combined"].append(flag_combined)
	data_dict["timestamp"].append(datetime.now().isoformat())

	# Create and push dataset
	new_dataset = Dataset.from_dict(data_dict)
	new_dataset.push_to_hub(
	DATASET_REPO,
	token=HF_TOKEN,
	private=True
	)

	return f"Successfully flagged as: {flag_combined}"

	except Exception as e:
	print(f"HF Dataset error: {e}")
	# Fall through to CSV fallback

	# CSV Fallback
	import csv
	os.makedirs("flagged_data", exist_ok=True)
	csv_path = "flagged_data/flags.csv"
	file_exists = os.path.exists(csv_path)

	with open(csv_path, "a", newline="", encoding="utf-8") as f:
	writer = csv.writer(f)
	if not file_exists:
	writer.writerow([
	"text", "temperature", "prediction",
	"sentiment_label", "issue_type", "flag_combined", "timestamp"
	])
	writer.writerow([
	text_input, temperature_input, prediction_output,
	sentiment_label if sentiment_label else "",
	issue_type if issue_type else "",
	flag_combined,
	datetime.now().isoformat()
	])

	return f"Flagged as: {flag_combined} (saved to CSV)"

	except Exception as e:
	return f"Error saving flag: {str(e)}"

	# Example texts
	examples = [
	["Bitcoin reaches new all-time high amid institutional adoption"],
	["Major cryptocurrency exchange faces security breach, users advised to withdraw funds"],
	["Ethereum network processes steady transaction volume with no significant changes"],
	["Solana announces new partnership with leading DeFi protocol"],
	["NFT market sees declining volumes as collectors wait for next bull run"],
	["Cardano's latest upgrade brings enhanced smart contract capabilities"],
	["Dogecoin community rallies behind charitable initiatives"],
	["PancakeSwap introduces new liquidity mining incentives"],
	["Stock prices plummeted after the disappointing earnings report."],
	["The quarterly results were in line with market forecasts."]
	]

	# Create custom Gradio interface with Blocks for multi-select flagging
	with gr.Blocks(theme=gr.themes.Soft(), title="CoinPulse Sentiment Analysis") as demo:

	gr.Markdown("""
	# 🪙 CoinPulse: Cryptocurrency Sentiment Analysis

	This model analyzes sentiment in cryptocurrency-related text using a fine-tuned FinBERT model
	with an extended tokenizer vocabulary.

	Key Features:
	- Extended Vocabulary: 520 crypto-specific tokens added (e.g., bitcoin, ethereum, defi, nft, solana)
	- Total Vocabulary Size: 31,024 tokens
	- Base Model: ProsusAI/finbert
	- Fine-tuned: On cryptocurrency news and social media data
	- Classification: Positive, Negative, Neutral

	Performance:
	- Test Accuracy: 93.00%
	- Test F1 (weighted): 92.53%
	- Best Validation F1: 92.86%
	""")

	with gr.Row():
	with gr.Column(scale=2):
	text_input = gr.Textbox(
	lines=5,
	placeholder="Enter cryptocurrency-related text here...",
	label="Input Text"
	)
	temperature_input = gr.Number(
	label="Temperature",
	value=2.0,
	precision=1,
	step=0.1,
	minimum=0.1,
	maximum=10.0
	)
	predict_btn = gr.Button("Analyze Sentiment", variant="primary")

	with gr.Column(scale=1):
	prediction_output = gr.Label(num_top_classes=3, label="Sentiment Prediction")

	gr.Markdown("---")
	gr.Markdown("""
	### 📌 Help Improve This Model

	If the prediction is incorrect or interesting, you can flag it:
	- Correct Sentiment: What should the sentiment be?
	- Issue Type: What's wrong or interesting about this prediction?

	You can select one, both, or neither.

	Don't forget to submit your selection by clicking 🚩 Flag This Prediction
	""")

	with gr.Row():
	sentiment_flag = gr.Radio(
	choices=["positive", "negative", "neutral"],
	label="Correct Sentiment",
	value=None
	)
	issue_flag = gr.Radio(
	choices=["incorrect prediction", "offensive content", "interesting case", "edge case"],
	label="Issue Type",
	value=None
	)

	with gr.Row():
	flag_btn = gr.Button("🚩 Flag This Prediction", variant="secondary")
	flag_status = gr.Textbox(label="Flag Status", interactive=False)

	gr.Markdown("---")
	gr.Markdown("### 💡 Try These Examples:")

	gr.Examples(
	examples=examples,
	inputs=[text_input],
	outputs=prediction_output,
	fn=predict_sentiment,
	cache_examples=False
	)

	# Connect the predict button
	predict_btn.click(
	fn=predict_sentiment,
	inputs=[text_input],
	outputs=prediction_output
	)

	# Connect the flag button
	flag_btn.click(
	fn=save_flagged_data,
	inputs=[text_input, temperature_input, prediction_output, sentiment_flag, issue_flag],
	outputs=flag_status
	)

	if __name__ == "__main__":
	print("Starting CoinPulse Sentiment Analysis...")
	if HF_TOKEN:
	print(f"Flagged data will be saved to: {DATASET_REPO}")
	else:
	print("Flagged data will be saved locally to: flagged_data/")

	demo.launch()