roshcheeku commited on
Commit
98e78d3
·
verified ·
1 Parent(s): 0aec656

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +463 -0
app.py ADDED
@@ -0,0 +1,463 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import json
4
+ from flask import Flask, request, jsonify
5
+ from flask_cors import CORS
6
+ import spacy
7
+ from textblob import TextBlob
8
+ import re
9
+ import tempfile
10
+ import PyPDF2
11
+ import docx
12
+ import pyttsx3
13
+ import threading
14
+ import logging
15
+ from werkzeug.utils import secure_filename
16
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
17
+
18
+ # Configure logging
19
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # Initialize Flask app
23
+ app = Flask(__name__)
24
+ CORS(app) # Enable CORS for all routes
25
+
26
+ # Configure environment
27
+ UPLOAD_FOLDER = os.path.join(os.getcwd(), 'uploads')
28
+ if not os.path.exists(UPLOAD_FOLDER):
29
+ os.makedirs(UPLOAD_FOLDER)
30
+ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
31
+ app.config['MAX_CONTENT_LENGTH'] = 20 * 1024 * 1024 # 20MB max upload
32
+
33
+ # Set up Hugging Face model parameters
34
+ HF_MODEL = os.environ.get('HF_MODEL', "mistralai/Mistral-7B-Instruct-v0.2")
35
+ logger.info(f"Using Hugging Face model: {HF_MODEL}")
36
+
37
+ # Dictionary to store chat sessions
38
+ chat_sessions = {}
39
+
40
+ # Load spaCy model
41
+ try:
42
+ nlp = spacy.load("en_core_web_sm")
43
+ logger.info("Successfully loaded spaCy model")
44
+ except Exception as e:
45
+ logger.error(f"Failed to load spaCy model: {str(e)}")
46
+ # Fallback to a simpler model if available
47
+ try:
48
+ nlp = spacy.load("en_core_web_md")
49
+ logger.info("Loaded fallback spaCy model")
50
+ except:
51
+ logger.error("Could not load any spaCy model")
52
+ # Define empty nlp function as fallback
53
+ def nlp(text):
54
+ class MockDoc:
55
+ def __init__(self, text):
56
+ self.text = text
57
+ self.noun_chunks = []
58
+ return MockDoc(text)
59
+
60
+ # Initialize text-to-speech engine
61
+ try:
62
+ engine = pyttsx3.init()
63
+ logger.info("Text-to-speech engine initialized")
64
+ except Exception as e:
65
+ logger.error(f"Failed to initialize text-to-speech: {str(e)}")
66
+ engine = None
67
+
68
+ # Load Hugging Face model and tokenizer
69
+ def load_hf_model():
70
+ try:
71
+ logger.info(f"Loading model: {HF_MODEL}")
72
+ tokenizer = AutoTokenizer.from_pretrained(HF_MODEL)
73
+ model = AutoModelForCausalLM.from_pretrained(HF_MODEL)
74
+ generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
75
+ logger.info("Successfully loaded model and tokenizer")
76
+ return generator
77
+ except Exception as e:
78
+ logger.error(f"Error loading model: {str(e)}")
79
+ return None
80
+
81
+ # Load model on startup
82
+ generator = load_hf_model()
83
+
84
+ # Bias detection patterns and empowering messages
85
+ bias_patterns = {
86
+ "suitability for leadership": "Absolutely! Women have led globally—in government, business, and science.",
87
+ "emotional stability": "Emotional intelligence is a leadership asset for everyone.",
88
+ "tech ability": "Women are innovators in tech—from Ada Lovelace to today's pioneers.",
89
+ "logical thinking": "Logic is a human ability, not gender-specific.",
90
+ "career vs family": "Many women successfully balance career and family. Stereotypes don't define reality.",
91
+ "aggressiveness in women": "Assertiveness is a leadership strength for all genders.",
92
+ "women in STEM": "Women have been crucial in STEM fields, past and present.",
93
+ "women in politics": "Women have led nations and made major political impacts globally.",
94
+ "women's emotional nature": "Emotions are part of being human and a leadership strength.",
95
+ "women's competence in business": "Women are highly competent business leaders and entrepreneurs.",
96
+ "women's role in history": "Women have made monumental contributions across history."
97
+ }
98
+
99
+ # Suggestion for reframing biased questions
100
+ def suggest_reframing(pattern):
101
+ reframes = {
102
+ "suitability for leadership": "Ask about leadership qualities in all individuals.",
103
+ "emotional stability": "Focus on emotional intelligence across all leaders.",
104
+ "tech ability": "Highlight tech expertise without linking to gender.",
105
+ "logical thinking": "Emphasize logical thinking as a universal human trait.",
106
+ "career vs family": "Discuss career and family balance inclusively.",
107
+ "aggressiveness in women": "Celebrate assertiveness for all genders.",
108
+ "women in STEM": "Celebrate contributions of everyone in STEM.",
109
+ "women in politics": "Recognize political leadership without assumptions.",
110
+ "women's emotional nature": "Focus on emotional intelligence as a human strength.",
111
+ "women's competence in business": "Highlight business leadership across all people.",
112
+ "women's role in history": "Explore contributions from all genders."
113
+ }
114
+ return reframes.get(pattern, "Consider rephrasing to be more inclusive.")
115
+
116
+ # Sentiment analysis
117
+ def analyze_sentiment(text):
118
+ blob = TextBlob(text)
119
+ polarity = blob.sentiment.polarity
120
+ if polarity > 0.1:
121
+ return "positive"
122
+ elif polarity < -0.1:
123
+ return "negative"
124
+ else:
125
+ return "neutral"
126
+
127
+ # Bias detection with suggestion
128
+ def detect_gender_bias(text):
129
+ doc = nlp(text.lower())
130
+ for chunk in doc.noun_chunks:
131
+ if "women" in chunk.text:
132
+ for pattern in bias_patterns:
133
+ if re.search(r'\b' + r'\b|\b'.join(pattern.split()) + r'\b', text.lower()):
134
+ suggestion = suggest_reframing(pattern)
135
+ return (
136
+ f"{bias_patterns[pattern]}\n\n"
137
+ "🛠️ Suggestion: " + suggestion
138
+ )
139
+ return None
140
+
141
+ # File handling functions
142
+ def extract_text_from_pdf(file_path):
143
+ """Extract text from PDF files"""
144
+ try:
145
+ text = ""
146
+ with open(file_path, 'rb') as file:
147
+ pdf_reader = PyPDF2.PdfReader(file)
148
+ for page_num in range(len(pdf_reader.pages)):
149
+ text += pdf_reader.pages[page_num].extract_text()
150
+ return text
151
+ except Exception as e:
152
+ logger.error(f"Error reading PDF: {str(e)}")
153
+ return f"Error reading PDF: {str(e)}"
154
+
155
+ def extract_text_from_docx(file_path):
156
+ """Extract text from DOCX files"""
157
+ try:
158
+ doc = docx.Document(file_path)
159
+ text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
160
+ return text
161
+ except Exception as e:
162
+ logger.error(f"Error reading DOCX: {str(e)}")
163
+ return f"Error reading DOCX: {str(e)}"
164
+
165
+ def process_file(file_path, file_type):
166
+ """Process different file types and extract text"""
167
+ if not os.path.exists(file_path):
168
+ return f"File not found: {file_path}"
169
+
170
+ file_extension = file_type.lower()
171
+
172
+ if 'pdf' in file_extension:
173
+ return extract_text_from_pdf(file_path)
174
+ elif file_extension in ['doc', 'docx']:
175
+ return extract_text_from_docx(file_path)
176
+ elif file_extension in ['txt', 'text']:
177
+ try:
178
+ with open(file_path, 'r', encoding='utf-8') as file:
179
+ return file.read()
180
+ except Exception as e:
181
+ logger.error(f"Error reading text file: {str(e)}")
182
+ return f"Error reading text file: {str(e)}"
183
+ elif file_extension in ['xls', 'xlsx']:
184
+ # Return placeholder for Excel files - consider integrating pandas for actual processing
185
+ return "Excel file detected. Specific content analysis currently limited."
186
+ elif file_extension in ['jpg', 'jpeg', 'png']:
187
+ # Placeholder for image files - consider adding OCR
188
+ return "Image file detected. OCR processing would occur here."
189
+ else:
190
+ return f"Processing for {file_extension} files is not supported."
191
+
192
+ def save_base64_file(base64_string, filename, file_type):
193
+ """Save a base64 encoded file to disk"""
194
+ try:
195
+ file_data = base64.b64decode(base64_string)
196
+ file_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(filename))
197
+
198
+ with open(file_path, 'wb') as f:
199
+ f.write(file_data)
200
+
201
+ return file_path
202
+ except Exception as e:
203
+ logger.error(f"Error saving file: {str(e)}")
204
+ return None
205
+
206
+ def get_or_create_chat_session(session_id):
207
+ """Create a new chat session or return an existing one"""
208
+ if session_id not in chat_sessions:
209
+ logger.info(f"Creating new chat session: {session_id}")
210
+
211
+ # Initialize with session history
212
+ chat_sessions[session_id] = {
213
+ "history": [
214
+ {
215
+ "role": "user",
216
+ "content": "You are Ashabot, an ethical AI chatbot. Always respond respectfully and avoid engaging in gender-biased or discriminatory content. "
217
+ "If such content is detected, respond with educational, inclusive, and fact-based replies. "
218
+ "You can understand document content and respond to various file types including PDFs, documents, and images."
219
+ },
220
+ {
221
+ "role": "assistant",
222
+ "content": "I am Ashabot, an ethical AI chatbot. I'm here to assist you with information and responses that are respectful and inclusive. "
223
+ "I can help analyze document content and respond to various file types. How can I assist you today?"
224
+ }
225
+ ]
226
+ }
227
+
228
+ return chat_sessions[session_id]
229
+
230
+ def generate_suggestions(response_text):
231
+ """Generate follow-up suggestions based on the response"""
232
+ suggestions = []
233
+
234
+ # Simple heuristic for generating follow-up questions
235
+ if "leadership" in response_text.lower():
236
+ suggestions.append("Tell me more about leadership qualities")
237
+
238
+ if "STEM" in response_text or "science" in response_text.lower():
239
+ suggestions.append("How can we encourage more diversity in STEM?")
240
+
241
+ if "career" in response_text.lower():
242
+ suggestions.append("What career opportunities align with my skills?")
243
+
244
+ # Add generic suggestions if we don't have specific ones
245
+ if len(suggestions) < 2:
246
+ suggestions.extend([
247
+ "How can I learn more about this topic?",
248
+ "Could you provide some resources on this subject?"
249
+ ])
250
+
251
+ return suggestions[:2] # Return at most 2 suggestions
252
+
253
+ def generate_opportunities(text, opportunities_data=None):
254
+ """Generate potential opportunities based on user input and profile data"""
255
+ opportunities = []
256
+
257
+ if opportunities_data:
258
+ skills = opportunities_data.get('skills', [])
259
+ interests = opportunities_data.get('interests', [])
260
+
261
+ # Simple matching algorithm - in production this would be more sophisticated
262
+ if any(skill.lower() in text.lower() for skill in skills):
263
+ opportunities.append({
264
+ "title": "Skill Development Opportunity",
265
+ "description": "Based on your skills, consider enhancing your expertise in this area.",
266
+ "url": "https://example.com/skill-development"
267
+ })
268
+
269
+ if any(interest.lower() in text.lower() for interest in interests):
270
+ opportunities.append({
271
+ "title": "Interest-Based Opportunity",
272
+ "description": "This aligns with your interests. Explore more in this field.",
273
+ "url": "https://example.com/explore-interests"
274
+ })
275
+
276
+ # Add a generic opportunity if we don't have specific matches
277
+ if not opportunities:
278
+ opportunities.append({
279
+ "title": "Learning Resource",
280
+ "description": "Explore more about this topic through our learning platform",
281
+ "url": "https://example.com/learn-more"
282
+ })
283
+
284
+ return opportunities
285
+
286
+ def generate_response_with_hf(prompt, chat_history=None):
287
+ """Generate response using Hugging Face model"""
288
+ if generator is None:
289
+ return "Model not available. Please check server logs."
290
+
291
+ try:
292
+ # Prepare conversation history for the model
293
+ formatted_prompt = ""
294
+ if chat_history:
295
+ for message in chat_history:
296
+ role = message.get("role", "")
297
+ content = message.get("content", "")
298
+ if role == "user":
299
+ formatted_prompt += f"User: {content}\n"
300
+ elif role == "assistant":
301
+ formatted_prompt += f"Assistant: {content}\n"
302
+
303
+ # Add current prompt
304
+ formatted_prompt += f"User: {prompt}\nAssistant:"
305
+
306
+ # Generate response
307
+ response = generator(
308
+ formatted_prompt,
309
+ max_length=1024,
310
+ num_return_sequences=1,
311
+ temperature=0.7,
312
+ top_p=0.9,
313
+ do_sample=True
314
+ )
315
+
316
+ # Extract and clean the response
317
+ generated_text = response[0]['generated_text']
318
+ assistant_response = generated_text.split("Assistant:")[-1].strip()
319
+
320
+ # Handle potential empty responses
321
+ if not assistant_response:
322
+ assistant_response = "I apologize, but I couldn't generate a response. Please try rephrasing your question."
323
+
324
+ return assistant_response
325
+
326
+ except Exception as e:
327
+ logger.error(f"Error generating response: {str(e)}")
328
+ return f"An error occurred while generating a response: {str(e)}"
329
+
330
+ @app.route('/api/chat', methods=['POST'])
331
+ def chat():
332
+ """Main endpoint for chat functionality"""
333
+ try:
334
+ # Parse request data
335
+ data = request.json
336
+ session_id = data.get('session_id')
337
+ user_message = data.get('message', '')
338
+ has_files = data.get('has_files', False)
339
+ files = data.get('files', [])
340
+ opportunities_data = data.get('opportunities_data', {})
341
+
342
+ if not session_id:
343
+ return jsonify({'error': 'Session ID is required'}), 400
344
+
345
+ logger.info(f"Received request for session {session_id}, has_files: {has_files}")
346
+
347
+ # Get chat session
348
+ chat_session = get_or_create_chat_session(session_id)
349
+
350
+ # Analyze sentiment
351
+ sentiment = analyze_sentiment(user_message)
352
+ logger.info(f"Message sentiment: {sentiment}")
353
+
354
+ # Check for gender bias
355
+ bias_warning = detect_gender_bias(user_message)
356
+ if bias_warning:
357
+ logger.info("Gender bias detected")
358
+ response_text = f"I noticed some gender bias in your message. {bias_warning}\n\nLet's continue the conversation inclusively! 🌟"
359
+
360
+ # Add messages to history
361
+ chat_session["history"].append({"role": "user", "content": user_message})
362
+ chat_session["history"].append({"role": "assistant", "content": response_text})
363
+
364
+ return jsonify({
365
+ 'response': response_text,
366
+ 'suggestions': generate_suggestions(response_text),
367
+ 'opportunities': []
368
+ })
369
+
370
+ # Process files if present
371
+ file_contents = []
372
+ if has_files and files:
373
+ for file_info in files:
374
+ file_name = file_info.get('file_name')
375
+ file_data = file_info.get('file_data')
376
+ file_type = file_info.get('file_type')
377
+
378
+ if file_name and file_data:
379
+ # Save file to disk
380
+ file_path = save_base64_file(file_data, file_name, file_type)
381
+
382
+ if file_path:
383
+ # Process file based on type
384
+ file_content = process_file(file_path, file_type)
385
+ if not file_content.startswith("Error") and not file_content.startswith("Processing for"):
386
+ file_contents.append(f"Content from {file_name}: {file_content[:5000]}") # Limit to 5000 chars per file
387
+
388
+ # Add message about successfully processed file
389
+ logger.info(f"Successfully processed file: {file_name}")
390
+ else:
391
+ logger.warning(f"Issue processing file: {file_content}")
392
+ else:
393
+ logger.error(f"Failed to save file: {file_name}")
394
+
395
+ # Construct complete message with both user text and file contents
396
+ complete_message = user_message
397
+ if file_contents:
398
+ complete_message += "\n\nAttached files content:\n" + "\n\n".join(file_contents)
399
+
400
+ # Add user message to history
401
+ chat_session["history"].append({"role": "user", "content": complete_message})
402
+
403
+ # Generate response with HF model
404
+ try:
405
+ response_text = generate_response_with_hf(complete_message, chat_session["history"])
406
+
407
+ # Add assistant response to history
408
+ chat_session["history"].append({"role": "assistant", "content": response_text})
409
+
410
+ # Keep history at a reasonable size (last 10 messages)
411
+ if len(chat_session["history"]) > 12: # Initial system messages + 10 user/assistant exchanges
412
+ chat_session["history"] = chat_session["history"][:2] + chat_session["history"][-10:]
413
+
414
+ # Generate suggestions based on response
415
+ suggestions = generate_suggestions(response_text)
416
+
417
+ # Generate opportunities based on user message and profile
418
+ opportunities = generate_opportunities(complete_message, opportunities_data)
419
+
420
+ return jsonify({
421
+ 'response': response_text,
422
+ 'suggestions': suggestions,
423
+ 'opportunities': opportunities
424
+ })
425
+
426
+ except Exception as e:
427
+ logger.error(f"Error generating response: {str(e)}")
428
+ return jsonify({
429
+ 'error': f"Error generating response: {str(e)}",
430
+ 'suggestions': ["Could you try rephrasing your question?", "Let's try a different topic"],
431
+ 'opportunities': []
432
+ }), 500
433
+
434
+ except Exception as e:
435
+ logger.error(f"Error processing request: {str(e)}")
436
+ return jsonify({'error': str(e)}), 500
437
+
438
+ @app.route('/api/health', methods=['GET'])
439
+ def health_check():
440
+ """Health check endpoint"""
441
+ return jsonify({
442
+ 'status': 'ok',
443
+ 'service': 'Ashabot API',
444
+ 'model': HF_MODEL
445
+ })
446
+
447
+ @app.route('/', methods=['GET'])
448
+ def index():
449
+ """Root endpoint with API documentation"""
450
+ return jsonify({
451
+ 'service': 'Ashabot API',
452
+ 'version': '1.0.0',
453
+ 'model': HF_MODEL,
454
+ 'endpoints': {
455
+ '/api/chat': 'POST - Send messages and files for processing',
456
+ '/api/health': 'GET - Health check'
457
+ },
458
+ 'documentation': 'See README.md for full API documentation'
459
+ })
460
+
461
+ if __name__ == '__main__':
462
+ port = int(os.environ.get('PORT', 5000))
463
+ app.run(host='0.0.0.0', port=port, debug=False) # Set debug=False for production