import streamlit as st import google.generativeai as genai from PyPDF2 import PdfReader import os import re import json import pickle import hashlib from datetime import datetime from pathlib import Path from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_huggingface.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain.schema import Document import tempfile import warnings import numpy as np import shutil import time warnings.filterwarnings('ignore') # Configure page st.set_page_config( page_title="Ashok 2.0 - AI Problem Solving Assistant", page_icon="🧠", layout="centered", initial_sidebar_state="collapsed" ) # World-class minimal UI styling st.markdown(""" """, unsafe_allow_html=True) class PersistentHFKnowledgeBase: """Persistent Knowledge Base with silent initialization""" def __init__(self): # Create persistent directories self.data_dir = Path("./persistent_data") self.data_dir.mkdir(exist_ok=True) # File paths for persistence self.vectorstore_path = self.data_dir / "vectorstore" self.metadata_path = self.data_dir / "metadata.json" self.conversations_path = self.data_dir / "conversations.json" self.stats_path = self.data_dir / "stats.json" self.init_flag_path = self.data_dir / "initialized.flag" # Initialize components self.embeddings = None self.vectorstore = None self.metadata = {} self.conversations = [] self.stats = {} # Initialize system silently self.initialize_system() def initialize_system(self): """Initialize the complete system with silent book processing""" try: # Initialize embeddings first self.init_embeddings() # Check if system was already initialized if self.init_flag_path.exists(): # Load existing knowledge base silently self.load_existing_knowledge() else: # First time initialization - do it silently self.first_time_initialization() except Exception as e: # Silent fallback initialization self.fallback_initialization() def init_embeddings(self): """Initialize embeddings model with silent caching""" if self.embeddings is None: try: cache_dir = self.data_dir / "embeddings_cache" cache_dir.mkdir(exist_ok=True) self.embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2", cache_folder=str(cache_dir) ) except Exception as e: return False return True def first_time_initialization(self): """Complete first-time setup with silent book processing""" try: # Initialize metadata self.metadata = { 'version': '2.0-minimal', 'created_at': datetime.now().isoformat(), 'last_updated': datetime.now().isoformat(), 'total_documents': 0, 'book_processed': False, 'book_info': {}, 'initialization_complete': False } # Initialize stats self.stats = { 'total_queries': 0, 'learning_sessions': 0, 'book_chunks': 0, 'conversation_chunks': 0, 'silly_questions_blocked': 0 } # Initialize conversations self.conversations = [] # Process book if available (silently) book_processed = self.process_startup_book() # Create default knowledge if no book if not book_processed: self.create_default_knowledge() # Mark as initialized self.metadata['initialization_complete'] = True self.save_all_data() # Create initialization flag with open(self.init_flag_path, 'w') as f: f.write(f"Initialized on {datetime.now().isoformat()}") except Exception as e: self.fallback_initialization() def process_startup_book(self): """Process the book included with the deployment (silently)""" book_paths = [ "book.pdf", "problem_solving_book.pdf", "default_book.pdf", "ashok_book.pdf" ] for book_path in book_paths: if Path(book_path).exists(): success = self.process_book_file(Path(book_path)) if success: return True return False def process_book_file(self, book_path): """Process a specific book file (silently)""" try: # Extract text from PDF reader = PdfReader(str(book_path)) page_texts = [] for page_num, page in enumerate(reader.pages, 1): page_text = page.extract_text() if page_text.strip(): page_texts.append({ 'page': page_num, 'text': page_text, 'word_count': len(page_text.split()) }) if not page_texts: return False # Create book info book_info = { 'title': book_path.name, 'path': str(book_path), 'pages': len(page_texts), 'processed_at': datetime.now().isoformat(), 'source': 'deployment_book' } # Process content success, message = self.process_book_content("", page_texts, book_info) return success except Exception as e: return False def create_default_knowledge(self): """Create comprehensive default knowledge base""" default_knowledge = [ { "content": "Problem-solving methodology: 1) Problem Definition - Clearly articulate what needs to be solved, 2) Information Gathering - Collect relevant data and context, 3) Root Cause Analysis - Identify underlying causes, not just symptoms, 4) Solution Generation - Brainstorm multiple potential solutions, 5) Solution Evaluation - Assess feasibility, impact, and resources, 6) Implementation Planning - Create detailed action steps, 7) Execution and Monitoring - Implement and track progress, 8) Review and Learning - Evaluate outcomes and extract lessons.", "metadata": { "source": "core_knowledge", "type": "framework", "topic": "problem_solving_process", "chapter": "Core Problem-Solving Framework" } }, { "content": "Decision-making best practices: Use the DECIDE framework - D: Define the problem clearly, E: Establish criteria for solutions, C: Consider alternatives systematically, I: Identify best alternatives using criteria, D: Develop and implement action plan, E: Evaluate and monitor solution effectiveness. Always consider stakeholder impact, resource constraints, time limitations, and potential risks.", "metadata": { "source": "core_knowledge", "type": "framework", "topic": "decision_making", "chapter": "Decision-Making Framework" } }, { "content": "Conflict resolution strategies: 1) Active Listening - Understand all perspectives without judgment, 2) Identify Interests - Focus on underlying needs, not stated positions, 3) Find Common Ground - Identify shared goals and values, 4) Generate Options - Create win-win solutions collaboratively, 5) Use Objective Criteria - Apply fair standards for evaluation, 6) Separate People from Problems - Address issues, not personalities, 7) Maintain Relationships - Preserve working relationships while solving problems.", "metadata": { "source": "core_knowledge", "type": "strategy", "topic": "conflict_resolution", "chapter": "Conflict Resolution Techniques" } }, { "content": "Critical thinking skills development: Analysis (breaking complex information into components), Evaluation (assessing credibility and logical strength), Inference (drawing reasonable conclusions), Interpretation (understanding meaning and significance), Explanation (articulating reasoning clearly), Self-regulation (monitoring and correcting one's thinking). Practice questioning assumptions, considering multiple perspectives, examining evidence quality, and recognizing logical fallacies.", "metadata": { "source": "core_knowledge", "type": "skills", "topic": "critical_thinking", "chapter": "Critical Thinking Development" } }, { "content": "Team problem-solving dynamics: Establish psychological safety for open communication, define roles and responsibilities clearly, use structured problem-solving processes, encourage diverse perspectives, facilitate effective meetings, manage conflicts constructively, ensure equal participation, document decisions and action items, follow up on commitments, celebrate successes and learn from failures.", "metadata": { "source": "core_knowledge", "type": "team_dynamics", "topic": "team_problem_solving", "chapter": "Team Collaboration for Problem Solving" } } ] # Create documents documents = [] for item in default_knowledge: doc = Document( page_content=item["content"], metadata=item["metadata"] ) documents.append(doc) # Create vectorstore if documents and self.embeddings: self.vectorstore = FAISS.from_documents(documents, self.embeddings) self.stats['book_chunks'] = len(documents) self.metadata['total_documents'] = len(documents) self.metadata['book_processed'] = True self.metadata['book_info'] = { 'title': 'Core Problem-Solving Knowledge', 'type': 'built_in', 'chunks': len(documents) } return True return False def load_existing_knowledge(self): """Load existing knowledge base from persistent storage""" try: # Load metadata if self.metadata_path.exists(): with open(self.metadata_path, 'r') as f: self.metadata = json.load(f) # Load stats if self.stats_path.exists(): with open(self.stats_path, 'r') as f: self.stats = json.load(f) # Load conversations if self.conversations_path.exists(): with open(self.conversations_path, 'r') as f: self.conversations = json.load(f) # Load vectorstore if self.vectorstore_path.exists() and self.embeddings: self.vectorstore = FAISS.load_local( str(self.vectorstore_path), self.embeddings, allow_dangerous_deserialization=True ) return True except Exception as e: return False return False def save_all_data(self): """Save all knowledge base data to persistent storage""" try: # Save metadata self.metadata['last_updated'] = datetime.now().isoformat() with open(self.metadata_path, 'w') as f: json.dump(self.metadata, f, indent=2) # Save stats with open(self.stats_path, 'w') as f: json.dump(self.stats, f, indent=2) # Save conversations with open(self.conversations_path, 'w') as f: json.dump(self.conversations, f, indent=2) # Save vectorstore if self.vectorstore: self.vectorstore.save_local(str(self.vectorstore_path)) return True except Exception as e: return False def fallback_initialization(self): """Fallback initialization if main process fails""" self.create_default_knowledge() self.metadata = {'fallback': True, 'created_at': datetime.now().isoformat()} self.stats = {'total_queries': 0, 'learning_sessions': 0, 'book_chunks': 0, 'conversation_chunks': 0, 'silly_questions_blocked': 0} self.conversations = [] def process_book_content(self, text, page_texts, book_info): """Process book content and add to knowledge base""" try: # Text splitter text_splitter = RecursiveCharacterTextSplitter( chunk_size=800, chunk_overlap=150, length_function=len, separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""] ) # Create documents documents = [] for page_info in page_texts: page_text = page_info['text'] chapter_title = self._extract_chapter_title(page_text.split('\n')) doc = Document( page_content=page_text, metadata={ "source": "book", "type": "book_content", "page": page_info['page'], "chapter": chapter_title, "word_count": page_info['word_count'], "book_title": book_info.get('title', 'Problem Solving Book'), "processed_at": datetime.now().isoformat() } ) documents.append(doc) # Split into chunks chunks = text_splitter.split_documents(documents) # Add to vectorstore if self.vectorstore is None: self.vectorstore = FAISS.from_documents(chunks, self.embeddings) else: new_vectorstore = FAISS.from_documents(chunks, self.embeddings) self.vectorstore.merge_from(new_vectorstore) # Update metadata self.metadata['book_processed'] = True self.metadata['book_info'] = book_info self.metadata['total_documents'] += len(chunks) self.stats['book_chunks'] += len(chunks) return True, f"Successfully processed {len(chunks)} chunks from {len(page_texts)} pages!" except Exception as e: return False, f"Error processing book: {str(e)}" def add_conversation_to_knowledge(self, question, answer): """Add conversation to persistent knowledge base (auto-save)""" if len(question.strip()) < 10 or len(answer.strip()) < 20: return False try: conversation_text = f"Question: {question}\n\nAnswer: {answer}" doc = Document( page_content=conversation_text, metadata={ "source": "learned_conversation", "type": "qa_pair", "question": question, "answer_preview": answer[:200] + "..." if len(answer) > 200 else answer, "conversation_id": hashlib.md5(conversation_text.encode()).hexdigest()[:8], "added_at": datetime.now().isoformat(), "quality_score": self._calculate_quality_score(question, answer) } ) # Add to vectorstore if self.vectorstore is None: self.vectorstore = FAISS.from_documents([doc], self.embeddings) else: new_vectorstore = FAISS.from_documents([doc], self.embeddings) self.vectorstore.merge_from(new_vectorstore) # Store conversation self.conversations.append({ 'question': question, 'answer': answer, 'timestamp': datetime.now().isoformat(), 'learned': True }) # Update stats self.stats['conversation_chunks'] += 1 self.stats['learning_sessions'] += 1 self.metadata['total_documents'] += 1 # Auto-save to persistent storage self.save_all_data() return True except Exception as e: return False def search_knowledge_base(self, query, k=5): """Search the persistent knowledge base""" if self.vectorstore is None: return [] try: self.stats['total_queries'] += 1 docs = self.vectorstore.similarity_search_with_score(query, k=k) results = [] for doc, score in docs: result = { 'content': doc.page_content, 'source': doc.metadata.get('source', 'unknown'), 'type': doc.metadata.get('type', 'unknown'), 'page': doc.metadata.get('page', 'N/A'), 'chapter': doc.metadata.get('chapter', 'Unknown Section'), 'similarity_score': float(score), 'metadata': doc.metadata } results.append(result) return results except Exception as e: return [] def _extract_chapter_title(self, lines): """Extract chapter title from text lines""" for line in lines[:10]: line = line.strip() if line and len(line) < 100: if re.match(r'(chapter|section|part|unit)\s+\d+', line.lower()): return line if line.isupper() or line.istitle(): return line return "General Content" def _calculate_quality_score(self, question, answer): """Calculate conversation quality score""" score = 0 # Question quality if len(question.split()) >= 5: score += 1 if any(word in question.lower() for word in ['how', 'what', 'why', 'strategy', 'problem']): score += 1 if '?' in question: score += 1 # Answer quality if len(answer.split()) >= 20: score += 1 if any(word in answer.lower() for word in ['approach', 'solution', 'method', 'step']): score += 1 return min(score, 5) class AshokMinimalChatbot: def __init__(self): self.knowledge_base = PersistentHFKnowledgeBase() def is_silly_question(self, question): """Detect silly or irrelevant questions""" question_lower = question.lower().strip() if len(question_lower) < 3: return True # Greeting patterns greeting_patterns = [ r'\b(hello|hi|hey|salam|namaste|adab)\b', r'\b(good morning|evening|afternoon)\b', r'\b(how are you|kaise ho|kya hal)\b', r'\b(what.*your name|who are you)\b' ] # Silly keywords silly_keywords = [ 'stupid', 'dumb', 'joke', 'funny', 'lol', 'weather', 'movie', 'song', 'game', 'gossip', 'love', 'dating', 'facebook', 'instagram', 'politics', 'religion', 'age', 'appearance' ] # Problem-solving keywords good_keywords = [ 'problem', 'solve', 'solution', 'strategy', 'approach', 'method', 'challenge', 'decision', 'plan', 'analyze', 'conflict', 'team', 'work', 'project', 'manage' ] # Check patterns for pattern in greeting_patterns: if re.search(pattern, question_lower): return True # Score keywords good_score = sum(1 for keyword in good_keywords if keyword in question_lower) if good_score >= 2: return False silly_score = sum(1 for keyword in silly_keywords if keyword in question_lower) if silly_score >= 1: return True # Check structure question_words = ['what', 'how', 'why', 'when', 'where', 'can', 'should'] has_question_word = any(word in question_lower.split() for word in question_words) word_count = len(question_lower.split()) if word_count < 4 and not has_question_word: return True return False def generate_response(self, question, api_key): """Generate response using Gemini""" try: # Check for silly questions if self.is_silly_question(question): self.knowledge_base.stats['silly_questions_blocked'] += 1 silly_responses = [ "Don't waste your time. Ask me something related to problem solving yaar!", "I'm here to help with problem solving, not for chit-chat. Be serious!", "Focus on real problems that need solving, samjha? Ask about strategies and approaches!", "This is a problem-solving platform. Ask me about challenges, decisions, ya conflict resolution!", "Tumhara dimagh kahan hai? Ask meaningful questions about problem-solving techniques." ] import random return random.choice(silly_responses), False # Configure Gemini genai.configure(api_key=api_key) model = genai.GenerativeModel('gemini-2.0-flash') # Search knowledge base relevant_results = self.knowledge_base.search_knowledge_base(question, k=5) # Build context context = "" references = [] if relevant_results: context = "=== RELEVANT KNOWLEDGE ===\n\n" for i, result in enumerate(relevant_results, 1): source_type = result['type'] if source_type == 'book_content': context += f"**Reference {i}** (Chapter: {result['chapter']}, Page: {result['page']}):\n" elif source_type == 'qa_pair': context += f"**Learning {i}** (From past conversations):\n" else: context += f"**Framework {i}** (Core knowledge):\n" context += f"{result['content']}\n\n" references.append(result) # Create prompt prompt = f""" You are Ashok, a problem-solving expert with Pakistani/Indian conversational style. Your characteristics: 1. Mix English with Urdu naturally: "acha", "bilkul", "samjha", "dekho" 2. Enthusiastic responses: "Excellent question bache!" or "Bahut acha sawal!" 3. Reference knowledge sources when available 4. Provide practical, actionable advice 5. Encouraging and professional tone {context} User Question: {question} Provide a comprehensive, practical response using your characteristic style. Reference the knowledge sources when relevant and give actionable steps. """ response = model.generate_content(prompt) final_response = response.text # Add clean references section if references: final_response += "\n\n**Knowledge Sources:**\n" for ref in references: if ref['type'] == 'book_content': final_response += f"• Book: {ref['chapter']} (Page {ref['page']})\n" elif ref['type'] == 'qa_pair': final_response += f"• Previous Learning\n" else: final_response += f"• Core Framework: {ref['metadata'].get('topic', 'Problem-solving')}\n" return final_response, True except Exception as e: return f"Sorry bache, I encountered an error: {str(e)}. Please check your API key and try again!", False def show_typing_indicator(): """Show typing indicator""" st.markdown("""