Spaces:

roshcheeku
/

ashabot

Runtime error

App Files Files Community

ashabot / app.py

roshcheeku

Create app.py

98e78d3 verified 12 months ago

raw

history blame contribute delete

19 kB

	import os
	import base64
	import json
	from flask import Flask, request, jsonify
	from flask_cors import CORS
	import spacy
	from textblob import TextBlob
	import re
	import tempfile
	import PyPDF2
	import docx
	import pyttsx3
	import threading
	import logging
	from werkzeug.utils import secure_filename
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

	# Configure logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	# Initialize Flask app
	app = Flask(__name__)
	CORS(app) # Enable CORS for all routes

	# Configure environment
	UPLOAD_FOLDER = os.path.join(os.getcwd(), 'uploads')
	if not os.path.exists(UPLOAD_FOLDER):
	os.makedirs(UPLOAD_FOLDER)
	app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
	app.config['MAX_CONTENT_LENGTH'] = 20 * 1024 * 1024 # 20MB max upload

	# Set up Hugging Face model parameters
	HF_MODEL = os.environ.get('HF_MODEL', "mistralai/Mistral-7B-Instruct-v0.2")
	logger.info(f"Using Hugging Face model: {HF_MODEL}")

	# Dictionary to store chat sessions
	chat_sessions = {}

	# Load spaCy model
	try:
	nlp = spacy.load("en_core_web_sm")
	logger.info("Successfully loaded spaCy model")
	except Exception as e:
	logger.error(f"Failed to load spaCy model: {str(e)}")
	# Fallback to a simpler model if available
	try:
	nlp = spacy.load("en_core_web_md")
	logger.info("Loaded fallback spaCy model")
	except:
	logger.error("Could not load any spaCy model")
	# Define empty nlp function as fallback
	def nlp(text):
	class MockDoc:
	def __init__(self, text):
	self.text = text
	self.noun_chunks = []
	return MockDoc(text)

	# Initialize text-to-speech engine
	try:
	engine = pyttsx3.init()
	logger.info("Text-to-speech engine initialized")
	except Exception as e:
	logger.error(f"Failed to initialize text-to-speech: {str(e)}")
	engine = None

	# Load Hugging Face model and tokenizer
	def load_hf_model():
	try:
	logger.info(f"Loading model: {HF_MODEL}")
	tokenizer = AutoTokenizer.from_pretrained(HF_MODEL)
	model = AutoModelForCausalLM.from_pretrained(HF_MODEL)
	generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
	logger.info("Successfully loaded model and tokenizer")
	return generator
	except Exception as e:
	logger.error(f"Error loading model: {str(e)}")
	return None

	# Load model on startup
	generator = load_hf_model()

	# Bias detection patterns and empowering messages
	bias_patterns = {
	"suitability for leadership": "Absolutely! Women have led globally—in government, business, and science.",
	"emotional stability": "Emotional intelligence is a leadership asset for everyone.",
	"tech ability": "Women are innovators in tech—from Ada Lovelace to today's pioneers.",
	"logical thinking": "Logic is a human ability, not gender-specific.",
	"career vs family": "Many women successfully balance career and family. Stereotypes don't define reality.",
	"aggressiveness in women": "Assertiveness is a leadership strength for all genders.",
	"women in STEM": "Women have been crucial in STEM fields, past and present.",
	"women in politics": "Women have led nations and made major political impacts globally.",
	"women's emotional nature": "Emotions are part of being human and a leadership strength.",
	"women's competence in business": "Women are highly competent business leaders and entrepreneurs.",
	"women's role in history": "Women have made monumental contributions across history."
	}

	# Suggestion for reframing biased questions
	def suggest_reframing(pattern):
	reframes = {
	"suitability for leadership": "Ask about leadership qualities in all individuals.",
	"emotional stability": "Focus on emotional intelligence across all leaders.",
	"tech ability": "Highlight tech expertise without linking to gender.",
	"logical thinking": "Emphasize logical thinking as a universal human trait.",
	"career vs family": "Discuss career and family balance inclusively.",
	"aggressiveness in women": "Celebrate assertiveness for all genders.",
	"women in STEM": "Celebrate contributions of everyone in STEM.",
	"women in politics": "Recognize political leadership without assumptions.",
	"women's emotional nature": "Focus on emotional intelligence as a human strength.",
	"women's competence in business": "Highlight business leadership across all people.",
	"women's role in history": "Explore contributions from all genders."
	}
	return reframes.get(pattern, "Consider rephrasing to be more inclusive.")

	# Sentiment analysis
	def analyze_sentiment(text):
	blob = TextBlob(text)
	polarity = blob.sentiment.polarity
	if polarity > 0.1:
	return "positive"
	elif polarity < -0.1:
	return "negative"
	else:
	return "neutral"

	# Bias detection with suggestion
	def detect_gender_bias(text):
	doc = nlp(text.lower())
	for chunk in doc.noun_chunks:
	if "women" in chunk.text:
	for pattern in bias_patterns:
	if re.search(r'\b' + r'\b\|\b'.join(pattern.split()) + r'\b', text.lower()):
	suggestion = suggest_reframing(pattern)
	return (
	f"{bias_patterns[pattern]}\n\n"
	"🛠️ Suggestion: " + suggestion
	)
	return None

	# File handling functions
	def extract_text_from_pdf(file_path):
	"""Extract text from PDF files"""
	try:
	text = ""
	with open(file_path, 'rb') as file:
	pdf_reader = PyPDF2.PdfReader(file)
	for page_num in range(len(pdf_reader.pages)):
	text += pdf_reader.pages[page_num].extract_text()
	return text
	except Exception as e:
	logger.error(f"Error reading PDF: {str(e)}")
	return f"Error reading PDF: {str(e)}"

	def extract_text_from_docx(file_path):
	"""Extract text from DOCX files"""
	try:
	doc = docx.Document(file_path)
	text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
	return text
	except Exception as e:
	logger.error(f"Error reading DOCX: {str(e)}")
	return f"Error reading DOCX: {str(e)}"

	def process_file(file_path, file_type):
	"""Process different file types and extract text"""
	if not os.path.exists(file_path):
	return f"File not found: {file_path}"

	file_extension = file_type.lower()

	if 'pdf' in file_extension:
	return extract_text_from_pdf(file_path)
	elif file_extension in ['doc', 'docx']:
	return extract_text_from_docx(file_path)
	elif file_extension in ['txt', 'text']:
	try:
	with open(file_path, 'r', encoding='utf-8') as file:
	return file.read()
	except Exception as e:
	logger.error(f"Error reading text file: {str(e)}")
	return f"Error reading text file: {str(e)}"
	elif file_extension in ['xls', 'xlsx']:
	# Return placeholder for Excel files - consider integrating pandas for actual processing
	return "Excel file detected. Specific content analysis currently limited."
	elif file_extension in ['jpg', 'jpeg', 'png']:
	# Placeholder for image files - consider adding OCR
	return "Image file detected. OCR processing would occur here."
	else:
	return f"Processing for {file_extension} files is not supported."

	def save_base64_file(base64_string, filename, file_type):
	"""Save a base64 encoded file to disk"""
	try:
	file_data = base64.b64decode(base64_string)
	file_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(filename))

	with open(file_path, 'wb') as f:
	f.write(file_data)

	return file_path
	except Exception as e:
	logger.error(f"Error saving file: {str(e)}")
	return None

	def get_or_create_chat_session(session_id):
	"""Create a new chat session or return an existing one"""
	if session_id not in chat_sessions:
	logger.info(f"Creating new chat session: {session_id}")

	# Initialize with session history
	chat_sessions[session_id] = {
	"history": [
	{
	"role": "user",
	"content": "You are Ashabot, an ethical AI chatbot. Always respond respectfully and avoid engaging in gender-biased or discriminatory content. "
	"If such content is detected, respond with educational, inclusive, and fact-based replies. "
	"You can understand document content and respond to various file types including PDFs, documents, and images."
	},
	{
	"role": "assistant",
	"content": "I am Ashabot, an ethical AI chatbot. I'm here to assist you with information and responses that are respectful and inclusive. "
	"I can help analyze document content and respond to various file types. How can I assist you today?"
	}
	]
	}

	return chat_sessions[session_id]

	def generate_suggestions(response_text):
	"""Generate follow-up suggestions based on the response"""
	suggestions = []

	# Simple heuristic for generating follow-up questions
	if "leadership" in response_text.lower():
	suggestions.append("Tell me more about leadership qualities")

	if "STEM" in response_text or "science" in response_text.lower():
	suggestions.append("How can we encourage more diversity in STEM?")

	if "career" in response_text.lower():
	suggestions.append("What career opportunities align with my skills?")

	# Add generic suggestions if we don't have specific ones
	if len(suggestions) < 2:
	suggestions.extend([
	"How can I learn more about this topic?",
	"Could you provide some resources on this subject?"
	])

	return suggestions[:2] # Return at most 2 suggestions

	def generate_opportunities(text, opportunities_data=None):
	"""Generate potential opportunities based on user input and profile data"""
	opportunities = []

	if opportunities_data:
	skills = opportunities_data.get('skills', [])
	interests = opportunities_data.get('interests', [])

	# Simple matching algorithm - in production this would be more sophisticated
	if any(skill.lower() in text.lower() for skill in skills):
	opportunities.append({
	"title": "Skill Development Opportunity",
	"description": "Based on your skills, consider enhancing your expertise in this area.",
	"url": "https://example.com/skill-development"
	})

	if any(interest.lower() in text.lower() for interest in interests):
	opportunities.append({
	"title": "Interest-Based Opportunity",
	"description": "This aligns with your interests. Explore more in this field.",
	"url": "https://example.com/explore-interests"
	})

	# Add a generic opportunity if we don't have specific matches
	if not opportunities:
	opportunities.append({
	"title": "Learning Resource",
	"description": "Explore more about this topic through our learning platform",
	"url": "https://example.com/learn-more"
	})

	return opportunities

	def generate_response_with_hf(prompt, chat_history=None):
	"""Generate response using Hugging Face model"""
	if generator is None:
	return "Model not available. Please check server logs."

	try:
	# Prepare conversation history for the model
	formatted_prompt = ""
	if chat_history:
	for message in chat_history:
	role = message.get("role", "")
	content = message.get("content", "")
	if role == "user":
	formatted_prompt += f"User: {content}\n"
	elif role == "assistant":
	formatted_prompt += f"Assistant: {content}\n"

	# Add current prompt
	formatted_prompt += f"User: {prompt}\nAssistant:"

	# Generate response
	response = generator(
	formatted_prompt,
	max_length=1024,
	num_return_sequences=1,
	temperature=0.7,
	top_p=0.9,
	do_sample=True
	)

	# Extract and clean the response
	generated_text = response[0]['generated_text']
	assistant_response = generated_text.split("Assistant:")[-1].strip()

	# Handle potential empty responses
	if not assistant_response:
	assistant_response = "I apologize, but I couldn't generate a response. Please try rephrasing your question."

	return assistant_response

	except Exception as e:
	logger.error(f"Error generating response: {str(e)}")
	return f"An error occurred while generating a response: {str(e)}"

	@app.route('/api/chat', methods=['POST'])
	def chat():
	"""Main endpoint for chat functionality"""
	try:
	# Parse request data
	data = request.json
	session_id = data.get('session_id')
	user_message = data.get('message', '')
	has_files = data.get('has_files', False)
	files = data.get('files', [])
	opportunities_data = data.get('opportunities_data', {})

	if not session_id:
	return jsonify({'error': 'Session ID is required'}), 400

	logger.info(f"Received request for session {session_id}, has_files: {has_files}")

	# Get chat session
	chat_session = get_or_create_chat_session(session_id)

	# Analyze sentiment
	sentiment = analyze_sentiment(user_message)
	logger.info(f"Message sentiment: {sentiment}")

	# Check for gender bias
	bias_warning = detect_gender_bias(user_message)
	if bias_warning:
	logger.info("Gender bias detected")
	response_text = f"I noticed some gender bias in your message. {bias_warning}\n\nLet's continue the conversation inclusively! 🌟"

	# Add messages to history
	chat_session["history"].append({"role": "user", "content": user_message})
	chat_session["history"].append({"role": "assistant", "content": response_text})

	return jsonify({
	'response': response_text,
	'suggestions': generate_suggestions(response_text),
	'opportunities': []
	})

	# Process files if present
	file_contents = []
	if has_files and files:
	for file_info in files:
	file_name = file_info.get('file_name')
	file_data = file_info.get('file_data')
	file_type = file_info.get('file_type')

	if file_name and file_data:
	# Save file to disk
	file_path = save_base64_file(file_data, file_name, file_type)

	if file_path:
	# Process file based on type
	file_content = process_file(file_path, file_type)
	if not file_content.startswith("Error") and not file_content.startswith("Processing for"):
	file_contents.append(f"Content from {file_name}: {file_content[:5000]}") # Limit to 5000 chars per file

	# Add message about successfully processed file
	logger.info(f"Successfully processed file: {file_name}")
	else:
	logger.warning(f"Issue processing file: {file_content}")
	else:
	logger.error(f"Failed to save file: {file_name}")

	# Construct complete message with both user text and file contents
	complete_message = user_message
	if file_contents:
	complete_message += "\n\nAttached files content:\n" + "\n\n".join(file_contents)

	# Add user message to history
	chat_session["history"].append({"role": "user", "content": complete_message})

	# Generate response with HF model
	try:
	response_text = generate_response_with_hf(complete_message, chat_session["history"])

	# Add assistant response to history
	chat_session["history"].append({"role": "assistant", "content": response_text})

	# Keep history at a reasonable size (last 10 messages)
	if len(chat_session["history"]) > 12: # Initial system messages + 10 user/assistant exchanges
	chat_session["history"] = chat_session["history"][:2] + chat_session["history"][-10:]

	# Generate suggestions based on response
	suggestions = generate_suggestions(response_text)

	# Generate opportunities based on user message and profile
	opportunities = generate_opportunities(complete_message, opportunities_data)

	return jsonify({
	'response': response_text,
	'suggestions': suggestions,
	'opportunities': opportunities
	})

	except Exception as e:
	logger.error(f"Error generating response: {str(e)}")
	return jsonify({
	'error': f"Error generating response: {str(e)}",
	'suggestions': ["Could you try rephrasing your question?", "Let's try a different topic"],
	'opportunities': []
	}), 500

	except Exception as e:
	logger.error(f"Error processing request: {str(e)}")
	return jsonify({'error': str(e)}), 500

	@app.route('/api/health', methods=['GET'])
	def health_check():
	"""Health check endpoint"""
	return jsonify({
	'status': 'ok',
	'service': 'Ashabot API',
	'model': HF_MODEL
	})

	@app.route('/', methods=['GET'])
	def index():
	"""Root endpoint with API documentation"""
	return jsonify({
	'service': 'Ashabot API',
	'version': '1.0.0',
	'model': HF_MODEL,
	'endpoints': {
	'/api/chat': 'POST - Send messages and files for processing',
	'/api/health': 'GET - Health check'
	},
	'documentation': 'See README.md for full API documentation'
	})

	if __name__ == '__main__':
	port = int(os.environ.get('PORT', 5000))
	app.run(host='0.0.0.0', port=port, debug=False) # Set debug=False for production