Spaces:

roshcheeku
/

ashabot

Runtime error

App Files Files Community

roshcheeku commited on Apr 27, 2025

Commit

98e78d3

verified ·

1 Parent(s): 0aec656

Create app.py

Browse files

Files changed (1) hide show

app.py +463 -0

app.py ADDED Viewed

	@@ -0,0 +1,463 @@

+import os
+import base64
+import json
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+import spacy
+from textblob import TextBlob
+import re
+import tempfile
+import PyPDF2
+import docx
+import pyttsx3
+import threading
+import logging
+from werkzeug.utils import secure_filename
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# Initialize Flask app
+app = Flask(__name__)
+CORS(app)  # Enable CORS for all routes
+# Configure environment
+UPLOAD_FOLDER = os.path.join(os.getcwd(), 'uploads')
+if not os.path.exists(UPLOAD_FOLDER):
+    os.makedirs(UPLOAD_FOLDER)
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+app.config['MAX_CONTENT_LENGTH'] = 20 * 1024 * 1024  # 20MB max upload
+# Set up Hugging Face model parameters
+HF_MODEL = os.environ.get('HF_MODEL', "mistralai/Mistral-7B-Instruct-v0.2")
+logger.info(f"Using Hugging Face model: {HF_MODEL}")
+# Dictionary to store chat sessions
+chat_sessions = {}
+# Load spaCy model
+try:
+    nlp = spacy.load("en_core_web_sm")
+    logger.info("Successfully loaded spaCy model")
+except Exception as e:
+    logger.error(f"Failed to load spaCy model: {str(e)}")
+    # Fallback to a simpler model if available
+    try:
+        nlp = spacy.load("en_core_web_md")
+        logger.info("Loaded fallback spaCy model")
+    except:
+        logger.error("Could not load any spaCy model")
+        # Define empty nlp function as fallback
+        def nlp(text):
+            class MockDoc:
+                def __init__(self, text):
+                    self.text = text
+                    self.noun_chunks = []
+            return MockDoc(text)
+# Initialize text-to-speech engine
+try:
+    engine = pyttsx3.init()
+    logger.info("Text-to-speech engine initialized")
+except Exception as e:
+    logger.error(f"Failed to initialize text-to-speech: {str(e)}")
+    engine = None
+# Load Hugging Face model and tokenizer
+def load_hf_model():
+    try:
+        logger.info(f"Loading model: {HF_MODEL}")
+        tokenizer = AutoTokenizer.from_pretrained(HF_MODEL)
+        model = AutoModelForCausalLM.from_pretrained(HF_MODEL)
+        generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
+        logger.info("Successfully loaded model and tokenizer")
+        return generator
+    except Exception as e:
+        logger.error(f"Error loading model: {str(e)}")
+        return None
+# Load model on startup
+generator = load_hf_model()
+# Bias detection patterns and empowering messages
+bias_patterns = {
+    "suitability for leadership": "Absolutely! Women have led globally—in government, business, and science.",
+    "emotional stability": "Emotional intelligence is a leadership asset for everyone.",
+    "tech ability": "Women are innovators in tech—from Ada Lovelace to today's pioneers.",
+    "logical thinking": "Logic is a human ability, not gender-specific.",
+    "career vs family": "Many women successfully balance career and family. Stereotypes don't define reality.",
+    "aggressiveness in women": "Assertiveness is a leadership strength for all genders.",
+    "women in STEM": "Women have been crucial in STEM fields, past and present.",
+    "women in politics": "Women have led nations and made major political impacts globally.",
+    "women's emotional nature": "Emotions are part of being human and a leadership strength.",
+    "women's competence in business": "Women are highly competent business leaders and entrepreneurs.",
+    "women's role in history": "Women have made monumental contributions across history."
+}
+# Suggestion for reframing biased questions
+def suggest_reframing(pattern):
+    reframes = {
+        "suitability for leadership": "Ask about leadership qualities in all individuals.",
+        "emotional stability": "Focus on emotional intelligence across all leaders.",
+        "tech ability": "Highlight tech expertise without linking to gender.",
+        "logical thinking": "Emphasize logical thinking as a universal human trait.",
+        "career vs family": "Discuss career and family balance inclusively.",
+        "aggressiveness in women": "Celebrate assertiveness for all genders.",
+        "women in STEM": "Celebrate contributions of everyone in STEM.",
+        "women in politics": "Recognize political leadership without assumptions.",
+        "women's emotional nature": "Focus on emotional intelligence as a human strength.",
+        "women's competence in business": "Highlight business leadership across all people.",
+        "women's role in history": "Explore contributions from all genders."
+    }
+    return reframes.get(pattern, "Consider rephrasing to be more inclusive.")
+# Sentiment analysis
+def analyze_sentiment(text):
+    blob = TextBlob(text)
+    polarity = blob.sentiment.polarity
+    if polarity > 0.1:
+        return "positive"
+    elif polarity < -0.1:
+        return "negative"
+    else:
+        return "neutral"
+# Bias detection with suggestion
+def detect_gender_bias(text):
+    doc = nlp(text.lower())
+    for chunk in doc.noun_chunks:
+        if "women" in chunk.text:
+            for pattern in bias_patterns:
+                if re.search(r'\b' + r'\b|\b'.join(pattern.split()) + r'\b', text.lower()):
+                    suggestion = suggest_reframing(pattern)
+                    return (
+                        f"{bias_patterns[pattern]}\n\n"
+                        "🛠️ Suggestion: " + suggestion
+                    )
+    return None
+# File handling functions
+def extract_text_from_pdf(file_path):
+    """Extract text from PDF files"""
+    try:
+        text = ""
+        with open(file_path, 'rb') as file:
+            pdf_reader = PyPDF2.PdfReader(file)
+            for page_num in range(len(pdf_reader.pages)):
+                text += pdf_reader.pages[page_num].extract_text()
+        return text
+    except Exception as e:
+        logger.error(f"Error reading PDF: {str(e)}")
+        return f"Error reading PDF: {str(e)}"
+def extract_text_from_docx(file_path):
+    """Extract text from DOCX files"""
+    try:
+        doc = docx.Document(file_path)
+        text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
+        return text
+    except Exception as e:
+        logger.error(f"Error reading DOCX: {str(e)}")
+        return f"Error reading DOCX: {str(e)}"
+def process_file(file_path, file_type):
+    """Process different file types and extract text"""
+    if not os.path.exists(file_path):
+        return f"File not found: {file_path}"
+    file_extension = file_type.lower()
+    if 'pdf' in file_extension:
+        return extract_text_from_pdf(file_path)
+    elif file_extension in ['doc', 'docx']:
+        return extract_text_from_docx(file_path)
+    elif file_extension in ['txt', 'text']:
+        try:
+            with open(file_path, 'r', encoding='utf-8') as file:
+                return file.read()
+        except Exception as e:
+            logger.error(f"Error reading text file: {str(e)}")
+            return f"Error reading text file: {str(e)}"
+    elif file_extension in ['xls', 'xlsx']:
+        # Return placeholder for Excel files - consider integrating pandas for actual processing
+        return "Excel file detected. Specific content analysis currently limited."
+    elif file_extension in ['jpg', 'jpeg', 'png']:
+        # Placeholder for image files - consider adding OCR
+        return "Image file detected. OCR processing would occur here."
+    else:
+        return f"Processing for {file_extension} files is not supported."
+def save_base64_file(base64_string, filename, file_type):
+    """Save a base64 encoded file to disk"""
+    try:
+        file_data = base64.b64decode(base64_string)
+        file_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(filename))
+        with open(file_path, 'wb') as f:
+            f.write(file_data)
+        return file_path
+    except Exception as e:
+        logger.error(f"Error saving file: {str(e)}")
+        return None
+def get_or_create_chat_session(session_id):
+    """Create a new chat session or return an existing one"""
+    if session_id not in chat_sessions:
+        logger.info(f"Creating new chat session: {session_id}")
+        # Initialize with session history
+        chat_sessions[session_id] = {
+            "history": [
+                {
+                    "role": "user",
+                    "content": "You are Ashabot, an ethical AI chatbot. Always respond respectfully and avoid engaging in gender-biased or discriminatory content. "
+                                "If such content is detected, respond with educational, inclusive, and fact-based replies. "
+                                "You can understand document content and respond to various file types including PDFs, documents, and images."
+                },
+                {
+                    "role": "assistant",
+                    "content": "I am Ashabot, an ethical AI chatbot. I'm here to assist you with information and responses that are respectful and inclusive. "
+                                "I can help analyze document content and respond to various file types. How can I assist you today?"
+                }
+            ]
+        }
+    return chat_sessions[session_id]
+def generate_suggestions(response_text):
+    """Generate follow-up suggestions based on the response"""
+    suggestions = []
+    # Simple heuristic for generating follow-up questions
+    if "leadership" in response_text.lower():
+        suggestions.append("Tell me more about leadership qualities")
+    if "STEM" in response_text or "science" in response_text.lower():
+        suggestions.append("How can we encourage more diversity in STEM?")
+    if "career" in response_text.lower():
+        suggestions.append("What career opportunities align with my skills?")
+    # Add generic suggestions if we don't have specific ones
+    if len(suggestions) < 2:
+        suggestions.extend([
+            "How can I learn more about this topic?",
+            "Could you provide some resources on this subject?"
+        ])
+    return suggestions[:2]  # Return at most 2 suggestions
+def generate_opportunities(text, opportunities_data=None):
+    """Generate potential opportunities based on user input and profile data"""
+    opportunities = []
+    if opportunities_data:
+        skills = opportunities_data.get('skills', [])
+        interests = opportunities_data.get('interests', [])
+        # Simple matching algorithm - in production this would be more sophisticated
+        if any(skill.lower() in text.lower() for skill in skills):
+            opportunities.append({
+                "title": "Skill Development Opportunity",
+                "description": "Based on your skills, consider enhancing your expertise in this area.",
+                "url": "https://example.com/skill-development"
+            })
+        if any(interest.lower() in text.lower() for interest in interests):
+            opportunities.append({
+                "title": "Interest-Based Opportunity",
+                "description": "This aligns with your interests. Explore more in this field.",
+                "url": "https://example.com/explore-interests"
+            })
+    # Add a generic opportunity if we don't have specific matches
+    if not opportunities:
+        opportunities.append({
+            "title": "Learning Resource",
+            "description": "Explore more about this topic through our learning platform",
+            "url": "https://example.com/learn-more"
+        })
+    return opportunities
+def generate_response_with_hf(prompt, chat_history=None):
+    """Generate response using Hugging Face model"""
+    if generator is None:
+        return "Model not available. Please check server logs."
+    try:
+        # Prepare conversation history for the model
+        formatted_prompt = ""
+        if chat_history:
+            for message in chat_history:
+                role = message.get("role", "")
+                content = message.get("content", "")
+                if role == "user":
+                    formatted_prompt += f"User: {content}\n"
+                elif role == "assistant":
+                    formatted_prompt += f"Assistant: {content}\n"
+        # Add current prompt
+        formatted_prompt += f"User: {prompt}\nAssistant:"
+        # Generate response
+        response = generator(
+            formatted_prompt,
+            max_length=1024,
+            num_return_sequences=1,
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True
+        )
+        # Extract and clean the response
+        generated_text = response[0]['generated_text']
+        assistant_response = generated_text.split("Assistant:")[-1].strip()
+        # Handle potential empty responses
+        if not assistant_response:
+            assistant_response = "I apologize, but I couldn't generate a response. Please try rephrasing your question."
+        return assistant_response
+    except Exception as e:
+        logger.error(f"Error generating response: {str(e)}")
+        return f"An error occurred while generating a response: {str(e)}"
+@app.route('/api/chat', methods=['POST'])
+def chat():
+    """Main endpoint for chat functionality"""
+    try:
+        # Parse request data
+        data = request.json
+        session_id = data.get('session_id')
+        user_message = data.get('message', '')
+        has_files = data.get('has_files', False)
+        files = data.get('files', [])
+        opportunities_data = data.get('opportunities_data', {})
+        if not session_id:
+            return jsonify({'error': 'Session ID is required'}), 400
+        logger.info(f"Received request for session {session_id}, has_files: {has_files}")
+        # Get chat session
+        chat_session = get_or_create_chat_session(session_id)
+        # Analyze sentiment
+        sentiment = analyze_sentiment(user_message)
+        logger.info(f"Message sentiment: {sentiment}")
+        # Check for gender bias
+        bias_warning = detect_gender_bias(user_message)
+        if bias_warning:
+            logger.info("Gender bias detected")
+            response_text = f"I noticed some gender bias in your message. {bias_warning}\n\nLet's continue the conversation inclusively! 🌟"
+            # Add messages to history
+            chat_session["history"].append({"role": "user", "content": user_message})
+            chat_session["history"].append({"role": "assistant", "content": response_text})
+            return jsonify({
+                'response': response_text,
+                'suggestions': generate_suggestions(response_text),
+                'opportunities': []
+            })
+        # Process files if present
+        file_contents = []
+        if has_files and files:
+            for file_info in files:
+                file_name = file_info.get('file_name')
+                file_data = file_info.get('file_data')
+                file_type = file_info.get('file_type')
+                if file_name and file_data:
+                    # Save file to disk
+                    file_path = save_base64_file(file_data, file_name, file_type)
+                    if file_path:
+                        # Process file based on type
+                        file_content = process_file(file_path, file_type)
+                        if not file_content.startswith("Error") and not file_content.startswith("Processing for"):
+                            file_contents.append(f"Content from {file_name}: {file_content[:5000]}")  # Limit to 5000 chars per file
+                            # Add message about successfully processed file
+                            logger.info(f"Successfully processed file: {file_name}")
+                        else:
+                            logger.warning(f"Issue processing file: {file_content}")
+                    else:
+                        logger.error(f"Failed to save file: {file_name}")
+        # Construct complete message with both user text and file contents
+        complete_message = user_message
+        if file_contents:
+            complete_message += "\n\nAttached files content:\n" + "\n\n".join(file_contents)
+        # Add user message to history
+        chat_session["history"].append({"role": "user", "content": complete_message})
+        # Generate response with HF model
+        try:
+            response_text = generate_response_with_hf(complete_message, chat_session["history"])
+            # Add assistant response to history
+            chat_session["history"].append({"role": "assistant", "content": response_text})
+            # Keep history at a reasonable size (last 10 messages)
+            if len(chat_session["history"]) > 12:  # Initial system messages + 10 user/assistant exchanges
+                chat_session["history"] = chat_session["history"][:2] + chat_session["history"][-10:]
+            # Generate suggestions based on response
+            suggestions = generate_suggestions(response_text)
+            # Generate opportunities based on user message and profile
+            opportunities = generate_opportunities(complete_message, opportunities_data)
+            return jsonify({
+                'response': response_text,
+                'suggestions': suggestions,
+                'opportunities': opportunities
+            })
+        except Exception as e:
+            logger.error(f"Error generating response: {str(e)}")
+            return jsonify({
+                'error': f"Error generating response: {str(e)}",
+                'suggestions': ["Could you try rephrasing your question?", "Let's try a different topic"],
+                'opportunities': []
+            }), 500
+    except Exception as e:
+        logger.error(f"Error processing request: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+@app.route('/api/health', methods=['GET'])
+def health_check():
+    """Health check endpoint"""
+    return jsonify({
+        'status': 'ok',
+        'service': 'Ashabot API',
+        'model': HF_MODEL
+    })
+@app.route('/', methods=['GET'])
+def index():
+    """Root endpoint with API documentation"""
+    return jsonify({
+        'service': 'Ashabot API',
+        'version': '1.0.0',
+        'model': HF_MODEL,
+        'endpoints': {
+            '/api/chat': 'POST - Send messages and files for processing',
+            '/api/health': 'GET - Health check'
+        },
+        'documentation': 'See README.md for full API documentation'
+    })
+if __name__ == '__main__':
+    port = int(os.environ.get('PORT', 5000))
+    app.run(host='0.0.0.0', port=port, debug=False)  # Set debug=False for production