Spaces:
Runtime error
Runtime error
| import os | |
| import base64 | |
| import json | |
| from flask import Flask, request, jsonify | |
| from flask_cors import CORS | |
| import spacy | |
| from textblob import TextBlob | |
| import re | |
| import tempfile | |
| import PyPDF2 | |
| import docx | |
| import pyttsx3 | |
| import threading | |
| import logging | |
| from werkzeug.utils import secure_filename | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| # Initialize Flask app | |
| app = Flask(__name__) | |
| CORS(app) # Enable CORS for all routes | |
| # Configure environment | |
| UPLOAD_FOLDER = os.path.join(os.getcwd(), 'uploads') | |
| if not os.path.exists(UPLOAD_FOLDER): | |
| os.makedirs(UPLOAD_FOLDER) | |
| app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER | |
| app.config['MAX_CONTENT_LENGTH'] = 20 * 1024 * 1024 # 20MB max upload | |
| # Set up Hugging Face model parameters | |
| HF_MODEL = os.environ.get('HF_MODEL', "mistralai/Mistral-7B-Instruct-v0.2") | |
| logger.info(f"Using Hugging Face model: {HF_MODEL}") | |
| # Dictionary to store chat sessions | |
| chat_sessions = {} | |
| # Load spaCy model | |
| try: | |
| nlp = spacy.load("en_core_web_sm") | |
| logger.info("Successfully loaded spaCy model") | |
| except Exception as e: | |
| logger.error(f"Failed to load spaCy model: {str(e)}") | |
| # Fallback to a simpler model if available | |
| try: | |
| nlp = spacy.load("en_core_web_md") | |
| logger.info("Loaded fallback spaCy model") | |
| except: | |
| logger.error("Could not load any spaCy model") | |
| # Define empty nlp function as fallback | |
| def nlp(text): | |
| class MockDoc: | |
| def __init__(self, text): | |
| self.text = text | |
| self.noun_chunks = [] | |
| return MockDoc(text) | |
| # Initialize text-to-speech engine | |
| try: | |
| engine = pyttsx3.init() | |
| logger.info("Text-to-speech engine initialized") | |
| except Exception as e: | |
| logger.error(f"Failed to initialize text-to-speech: {str(e)}") | |
| engine = None | |
| # Load Hugging Face model and tokenizer | |
| def load_hf_model(): | |
| try: | |
| logger.info(f"Loading model: {HF_MODEL}") | |
| tokenizer = AutoTokenizer.from_pretrained(HF_MODEL) | |
| model = AutoModelForCausalLM.from_pretrained(HF_MODEL) | |
| generator = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
| logger.info("Successfully loaded model and tokenizer") | |
| return generator | |
| except Exception as e: | |
| logger.error(f"Error loading model: {str(e)}") | |
| return None | |
| # Load model on startup | |
| generator = load_hf_model() | |
| # Bias detection patterns and empowering messages | |
| bias_patterns = { | |
| "suitability for leadership": "Absolutely! Women have led globally—in government, business, and science.", | |
| "emotional stability": "Emotional intelligence is a leadership asset for everyone.", | |
| "tech ability": "Women are innovators in tech—from Ada Lovelace to today's pioneers.", | |
| "logical thinking": "Logic is a human ability, not gender-specific.", | |
| "career vs family": "Many women successfully balance career and family. Stereotypes don't define reality.", | |
| "aggressiveness in women": "Assertiveness is a leadership strength for all genders.", | |
| "women in STEM": "Women have been crucial in STEM fields, past and present.", | |
| "women in politics": "Women have led nations and made major political impacts globally.", | |
| "women's emotional nature": "Emotions are part of being human and a leadership strength.", | |
| "women's competence in business": "Women are highly competent business leaders and entrepreneurs.", | |
| "women's role in history": "Women have made monumental contributions across history." | |
| } | |
| # Suggestion for reframing biased questions | |
| def suggest_reframing(pattern): | |
| reframes = { | |
| "suitability for leadership": "Ask about leadership qualities in all individuals.", | |
| "emotional stability": "Focus on emotional intelligence across all leaders.", | |
| "tech ability": "Highlight tech expertise without linking to gender.", | |
| "logical thinking": "Emphasize logical thinking as a universal human trait.", | |
| "career vs family": "Discuss career and family balance inclusively.", | |
| "aggressiveness in women": "Celebrate assertiveness for all genders.", | |
| "women in STEM": "Celebrate contributions of everyone in STEM.", | |
| "women in politics": "Recognize political leadership without assumptions.", | |
| "women's emotional nature": "Focus on emotional intelligence as a human strength.", | |
| "women's competence in business": "Highlight business leadership across all people.", | |
| "women's role in history": "Explore contributions from all genders." | |
| } | |
| return reframes.get(pattern, "Consider rephrasing to be more inclusive.") | |
| # Sentiment analysis | |
| def analyze_sentiment(text): | |
| blob = TextBlob(text) | |
| polarity = blob.sentiment.polarity | |
| if polarity > 0.1: | |
| return "positive" | |
| elif polarity < -0.1: | |
| return "negative" | |
| else: | |
| return "neutral" | |
| # Bias detection with suggestion | |
| def detect_gender_bias(text): | |
| doc = nlp(text.lower()) | |
| for chunk in doc.noun_chunks: | |
| if "women" in chunk.text: | |
| for pattern in bias_patterns: | |
| if re.search(r'\b' + r'\b|\b'.join(pattern.split()) + r'\b', text.lower()): | |
| suggestion = suggest_reframing(pattern) | |
| return ( | |
| f"{bias_patterns[pattern]}\n\n" | |
| "🛠️ Suggestion: " + suggestion | |
| ) | |
| return None | |
| # File handling functions | |
| def extract_text_from_pdf(file_path): | |
| """Extract text from PDF files""" | |
| try: | |
| text = "" | |
| with open(file_path, 'rb') as file: | |
| pdf_reader = PyPDF2.PdfReader(file) | |
| for page_num in range(len(pdf_reader.pages)): | |
| text += pdf_reader.pages[page_num].extract_text() | |
| return text | |
| except Exception as e: | |
| logger.error(f"Error reading PDF: {str(e)}") | |
| return f"Error reading PDF: {str(e)}" | |
| def extract_text_from_docx(file_path): | |
| """Extract text from DOCX files""" | |
| try: | |
| doc = docx.Document(file_path) | |
| text = "\n".join([paragraph.text for paragraph in doc.paragraphs]) | |
| return text | |
| except Exception as e: | |
| logger.error(f"Error reading DOCX: {str(e)}") | |
| return f"Error reading DOCX: {str(e)}" | |
| def process_file(file_path, file_type): | |
| """Process different file types and extract text""" | |
| if not os.path.exists(file_path): | |
| return f"File not found: {file_path}" | |
| file_extension = file_type.lower() | |
| if 'pdf' in file_extension: | |
| return extract_text_from_pdf(file_path) | |
| elif file_extension in ['doc', 'docx']: | |
| return extract_text_from_docx(file_path) | |
| elif file_extension in ['txt', 'text']: | |
| try: | |
| with open(file_path, 'r', encoding='utf-8') as file: | |
| return file.read() | |
| except Exception as e: | |
| logger.error(f"Error reading text file: {str(e)}") | |
| return f"Error reading text file: {str(e)}" | |
| elif file_extension in ['xls', 'xlsx']: | |
| # Return placeholder for Excel files - consider integrating pandas for actual processing | |
| return "Excel file detected. Specific content analysis currently limited." | |
| elif file_extension in ['jpg', 'jpeg', 'png']: | |
| # Placeholder for image files - consider adding OCR | |
| return "Image file detected. OCR processing would occur here." | |
| else: | |
| return f"Processing for {file_extension} files is not supported." | |
| def save_base64_file(base64_string, filename, file_type): | |
| """Save a base64 encoded file to disk""" | |
| try: | |
| file_data = base64.b64decode(base64_string) | |
| file_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(filename)) | |
| with open(file_path, 'wb') as f: | |
| f.write(file_data) | |
| return file_path | |
| except Exception as e: | |
| logger.error(f"Error saving file: {str(e)}") | |
| return None | |
| def get_or_create_chat_session(session_id): | |
| """Create a new chat session or return an existing one""" | |
| if session_id not in chat_sessions: | |
| logger.info(f"Creating new chat session: {session_id}") | |
| # Initialize with session history | |
| chat_sessions[session_id] = { | |
| "history": [ | |
| { | |
| "role": "user", | |
| "content": "You are Ashabot, an ethical AI chatbot. Always respond respectfully and avoid engaging in gender-biased or discriminatory content. " | |
| "If such content is detected, respond with educational, inclusive, and fact-based replies. " | |
| "You can understand document content and respond to various file types including PDFs, documents, and images." | |
| }, | |
| { | |
| "role": "assistant", | |
| "content": "I am Ashabot, an ethical AI chatbot. I'm here to assist you with information and responses that are respectful and inclusive. " | |
| "I can help analyze document content and respond to various file types. How can I assist you today?" | |
| } | |
| ] | |
| } | |
| return chat_sessions[session_id] | |
| def generate_suggestions(response_text): | |
| """Generate follow-up suggestions based on the response""" | |
| suggestions = [] | |
| # Simple heuristic for generating follow-up questions | |
| if "leadership" in response_text.lower(): | |
| suggestions.append("Tell me more about leadership qualities") | |
| if "STEM" in response_text or "science" in response_text.lower(): | |
| suggestions.append("How can we encourage more diversity in STEM?") | |
| if "career" in response_text.lower(): | |
| suggestions.append("What career opportunities align with my skills?") | |
| # Add generic suggestions if we don't have specific ones | |
| if len(suggestions) < 2: | |
| suggestions.extend([ | |
| "How can I learn more about this topic?", | |
| "Could you provide some resources on this subject?" | |
| ]) | |
| return suggestions[:2] # Return at most 2 suggestions | |
| def generate_opportunities(text, opportunities_data=None): | |
| """Generate potential opportunities based on user input and profile data""" | |
| opportunities = [] | |
| if opportunities_data: | |
| skills = opportunities_data.get('skills', []) | |
| interests = opportunities_data.get('interests', []) | |
| # Simple matching algorithm - in production this would be more sophisticated | |
| if any(skill.lower() in text.lower() for skill in skills): | |
| opportunities.append({ | |
| "title": "Skill Development Opportunity", | |
| "description": "Based on your skills, consider enhancing your expertise in this area.", | |
| "url": "https://example.com/skill-development" | |
| }) | |
| if any(interest.lower() in text.lower() for interest in interests): | |
| opportunities.append({ | |
| "title": "Interest-Based Opportunity", | |
| "description": "This aligns with your interests. Explore more in this field.", | |
| "url": "https://example.com/explore-interests" | |
| }) | |
| # Add a generic opportunity if we don't have specific matches | |
| if not opportunities: | |
| opportunities.append({ | |
| "title": "Learning Resource", | |
| "description": "Explore more about this topic through our learning platform", | |
| "url": "https://example.com/learn-more" | |
| }) | |
| return opportunities | |
| def generate_response_with_hf(prompt, chat_history=None): | |
| """Generate response using Hugging Face model""" | |
| if generator is None: | |
| return "Model not available. Please check server logs." | |
| try: | |
| # Prepare conversation history for the model | |
| formatted_prompt = "" | |
| if chat_history: | |
| for message in chat_history: | |
| role = message.get("role", "") | |
| content = message.get("content", "") | |
| if role == "user": | |
| formatted_prompt += f"User: {content}\n" | |
| elif role == "assistant": | |
| formatted_prompt += f"Assistant: {content}\n" | |
| # Add current prompt | |
| formatted_prompt += f"User: {prompt}\nAssistant:" | |
| # Generate response | |
| response = generator( | |
| formatted_prompt, | |
| max_length=1024, | |
| num_return_sequences=1, | |
| temperature=0.7, | |
| top_p=0.9, | |
| do_sample=True | |
| ) | |
| # Extract and clean the response | |
| generated_text = response[0]['generated_text'] | |
| assistant_response = generated_text.split("Assistant:")[-1].strip() | |
| # Handle potential empty responses | |
| if not assistant_response: | |
| assistant_response = "I apologize, but I couldn't generate a response. Please try rephrasing your question." | |
| return assistant_response | |
| except Exception as e: | |
| logger.error(f"Error generating response: {str(e)}") | |
| return f"An error occurred while generating a response: {str(e)}" | |
| def chat(): | |
| """Main endpoint for chat functionality""" | |
| try: | |
| # Parse request data | |
| data = request.json | |
| session_id = data.get('session_id') | |
| user_message = data.get('message', '') | |
| has_files = data.get('has_files', False) | |
| files = data.get('files', []) | |
| opportunities_data = data.get('opportunities_data', {}) | |
| if not session_id: | |
| return jsonify({'error': 'Session ID is required'}), 400 | |
| logger.info(f"Received request for session {session_id}, has_files: {has_files}") | |
| # Get chat session | |
| chat_session = get_or_create_chat_session(session_id) | |
| # Analyze sentiment | |
| sentiment = analyze_sentiment(user_message) | |
| logger.info(f"Message sentiment: {sentiment}") | |
| # Check for gender bias | |
| bias_warning = detect_gender_bias(user_message) | |
| if bias_warning: | |
| logger.info("Gender bias detected") | |
| response_text = f"I noticed some gender bias in your message. {bias_warning}\n\nLet's continue the conversation inclusively! 🌟" | |
| # Add messages to history | |
| chat_session["history"].append({"role": "user", "content": user_message}) | |
| chat_session["history"].append({"role": "assistant", "content": response_text}) | |
| return jsonify({ | |
| 'response': response_text, | |
| 'suggestions': generate_suggestions(response_text), | |
| 'opportunities': [] | |
| }) | |
| # Process files if present | |
| file_contents = [] | |
| if has_files and files: | |
| for file_info in files: | |
| file_name = file_info.get('file_name') | |
| file_data = file_info.get('file_data') | |
| file_type = file_info.get('file_type') | |
| if file_name and file_data: | |
| # Save file to disk | |
| file_path = save_base64_file(file_data, file_name, file_type) | |
| if file_path: | |
| # Process file based on type | |
| file_content = process_file(file_path, file_type) | |
| if not file_content.startswith("Error") and not file_content.startswith("Processing for"): | |
| file_contents.append(f"Content from {file_name}: {file_content[:5000]}") # Limit to 5000 chars per file | |
| # Add message about successfully processed file | |
| logger.info(f"Successfully processed file: {file_name}") | |
| else: | |
| logger.warning(f"Issue processing file: {file_content}") | |
| else: | |
| logger.error(f"Failed to save file: {file_name}") | |
| # Construct complete message with both user text and file contents | |
| complete_message = user_message | |
| if file_contents: | |
| complete_message += "\n\nAttached files content:\n" + "\n\n".join(file_contents) | |
| # Add user message to history | |
| chat_session["history"].append({"role": "user", "content": complete_message}) | |
| # Generate response with HF model | |
| try: | |
| response_text = generate_response_with_hf(complete_message, chat_session["history"]) | |
| # Add assistant response to history | |
| chat_session["history"].append({"role": "assistant", "content": response_text}) | |
| # Keep history at a reasonable size (last 10 messages) | |
| if len(chat_session["history"]) > 12: # Initial system messages + 10 user/assistant exchanges | |
| chat_session["history"] = chat_session["history"][:2] + chat_session["history"][-10:] | |
| # Generate suggestions based on response | |
| suggestions = generate_suggestions(response_text) | |
| # Generate opportunities based on user message and profile | |
| opportunities = generate_opportunities(complete_message, opportunities_data) | |
| return jsonify({ | |
| 'response': response_text, | |
| 'suggestions': suggestions, | |
| 'opportunities': opportunities | |
| }) | |
| except Exception as e: | |
| logger.error(f"Error generating response: {str(e)}") | |
| return jsonify({ | |
| 'error': f"Error generating response: {str(e)}", | |
| 'suggestions': ["Could you try rephrasing your question?", "Let's try a different topic"], | |
| 'opportunities': [] | |
| }), 500 | |
| except Exception as e: | |
| logger.error(f"Error processing request: {str(e)}") | |
| return jsonify({'error': str(e)}), 500 | |
| def health_check(): | |
| """Health check endpoint""" | |
| return jsonify({ | |
| 'status': 'ok', | |
| 'service': 'Ashabot API', | |
| 'model': HF_MODEL | |
| }) | |
| def index(): | |
| """Root endpoint with API documentation""" | |
| return jsonify({ | |
| 'service': 'Ashabot API', | |
| 'version': '1.0.0', | |
| 'model': HF_MODEL, | |
| 'endpoints': { | |
| '/api/chat': 'POST - Send messages and files for processing', | |
| '/api/health': 'GET - Health check' | |
| }, | |
| 'documentation': 'See README.md for full API documentation' | |
| }) | |
| if __name__ == '__main__': | |
| port = int(os.environ.get('PORT', 5000)) | |
| app.run(host='0.0.0.0', port=port, debug=False) # Set debug=False for production |