Spaces:

Krishkanth
/

krish-mind-chat

Sleeping

App Files Files Community

krish-mind-chat / app_backup.py

Krishkanth

Update: ChatGPT-style file upload, PDF/DOCX/PPT support

999fe83 13 days ago

raw

history blame contribute delete

22.3 kB


	"""
	Krish Mind Local Server (GGUF)
	==============================
	Works with index_local.html
	Features: GGUF Model + Web Search + Image Generation + RAG + File Upload
	"""

	import os
	import sys
	import urllib.parse
	import pickle
	import tempfile
	from datetime import datetime

	print("=" * 60)
	print("🧠 Krish Mind Local Server (GGUF)")
	print("=" * 60)

	# --- Core dependencies ---
	try:
	from llama_cpp import Llama
	print("✅ llama-cpp-python")
	except ImportError:
	print("❌ Run: pip install llama-cpp-python")
	sys.exit(1)

	try:
	from fastapi import FastAPI, UploadFile, File
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	import uvicorn
	print("✅ fastapi + uvicorn")
	except ImportError:
	print("❌ Run: pip install fastapi uvicorn python-multipart")
	sys.exit(1)

	# --- Config ---
	GGUF_PATH = "d:/Krish Mind/gguf/krish-mind-standalone-Q4.gguf"
	EMBEDDINGS_FILE = "../data/krce_embeddings.pkl"
	DATA_FILE = "../data/krce_college_data.jsonl"

	# --- Load GGUF Model ---
	print(f"\n⏳ Loading GGUF model...")
	try:
	model = Llama(
	model_path=GGUF_PATH,
	n_ctx=4096,
	n_gpu_layers=0,
	verbose=False
	)
	print("✅ Model loaded!")
	except Exception as e:
	print(f"❌ Model error: {e}")
	sys.exit(1)

	# --- DuckDuckGo Web Search ---
	print("\n📦 Loading optional features...")
	ddgs = None
	try:
	import warnings
	warnings.filterwarnings("ignore")
	from duckduckgo_search import DDGS
	ddgs = DDGS()
	print("✅ DuckDuckGo web search")
	except Exception as e:
	print(f"⚠️ Web search disabled: {e}")

	# --- RAG SETUP (Load Pre-computed Embeddings) ---
	print("📚 Loading Knowledge Base...")
	knowledge_base = []
	doc_embeddings = None
	rag_model = None

	# Try to load pre-computed embeddings first
	if os.path.exists(EMBEDDINGS_FILE):
	try:
	import numpy as np
	from sentence_transformers import SentenceTransformer

	print(f"📂 Loading pre-computed embeddings from {EMBEDDINGS_FILE}...")
	with open(EMBEDDINGS_FILE, 'rb') as f:
	data = pickle.load(f)

	knowledge_base = data['knowledge_base']
	doc_embeddings = data['embeddings']

	# Load the model for query encoding (needed for search)
	rag_model = SentenceTransformer(data.get('model_name', 'all-MiniLM-L6-v2'))
	print(f"✅ Embeddings loaded! ({len(knowledge_base)} facts)")

	except Exception as e:
	print(f"⚠️ Could not load embeddings: {e}")
	print(" Falling back to live embedding...")

	# Fallback: compute embeddings if pkl not found
	if doc_embeddings is None and os.path.exists(DATA_FILE):
	try:
	from sentence_transformers import SentenceTransformer
	import numpy as np
	import json

	rag_model = SentenceTransformer('all-MiniLM-L6-v2')
	print("✅ Embedding model loaded (SentenceTransformer)")

	with open(DATA_FILE, 'r') as f:
	for line in f:
	if line.strip():
	try:
	knowledge_base.append(json.loads(line))
	except:
	pass

	if knowledge_base:
	docs = [f"{k['instruction']} {k['output']}" for k in knowledge_base]
	doc_embeddings = rag_model.encode(docs)
	print(f"✅ Embeddings computed! ({len(knowledge_base)} facts)")
	print(" ⚠️ Run 'python scripts/build_embeddings.py' for faster startup!")

	except Exception as e:
	print(f"❌ RAG disabled: {e}")
	rag_model = None
	else:
	if doc_embeddings is None:
	print("⚠️ Data file not found! RAG disabled.")

	# Initialize Cross-Encoder for re-ranking
	cross_encoder = None
	try:
	from sentence_transformers import CrossEncoder
	cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
	print("✅ Cross-Encoder loaded for re-ranking")
	except Exception as e:
	print(f"⚠️ Cross-Encoder not available: {e}")

	# ============================================
	# FILE UPLOAD RAG: Session-based document analysis
	# ============================================
	# Store uploaded file embeddings per session (in-memory)
	session_file_data = {}

	def extract_text_from_file(file_path: str, filename: str) -> str:
	"""Extract text from uploaded file"""
	text = ""
	ext = filename.lower().split('.')[-1]

	try:
	if ext == 'txt':
	with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
	text = f.read()

	elif ext == 'pdf':
	try:
	import PyPDF2
	with open(file_path, 'rb') as f:
	reader = PyPDF2.PdfReader(f)
	for page in reader.pages:
	text += page.extract_text() + "\n"
	except ImportError:
	try:
	import fitz # PyMuPDF
	doc = fitz.open(file_path)
	for page in doc:
	text += page.get_text() + "\n"
	doc.close()
	except ImportError:
	return "Error: Install PyPDF2 or PyMuPDF to read PDFs"

	elif ext in ['doc', 'docx']:
	try:
	import docx
	doc = docx.Document(file_path)
	text = "\n".join([para.text for para in doc.paragraphs])
	except ImportError:
	return "Error: Install python-docx to read Word files"

	elif ext in ['md', 'json', 'csv']:
	with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
	text = f.read()

	else:
	# Try reading as plain text
	with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
	text = f.read()

	except Exception as e:
	return f"Error reading file: {e}"

	return text.strip()

	def chunk_text(text: str, chunk_size: int = 500, overlap: int = 50) -> list:
	"""Split text into chunks for embedding"""
	words = text.split()
	chunks = []

	for i in range(0, len(words), chunk_size - overlap):
	chunk = " ".join(words[i:i + chunk_size])
	if chunk.strip():
	chunks.append(chunk)

	return chunks

	# ============================================
	# ADVANCED RAG: Query Expansion + Hybrid Search
	# ============================================

	ABBREVIATIONS = {
	"aids": "AI&DS Artificial Intelligence and Data Science",
	"ai&ds": "AI&DS Artificial Intelligence and Data Science",
	"aid": "AI&DS Artificial Intelligence and Data Science",
	"aiml": "AI&ML Artificial Intelligence and Machine Learning",
	"ai&ml": "AI&ML Artificial Intelligence and Machine Learning",
	"cse": "Computer Science Engineering CSE",
	"ece": "Electronics Communication Engineering ECE",
	"eee": "Electrical Electronics Engineering EEE",
	"mech": "Mechanical Engineering",
	"it": "Information Technology IT",
	"hod": "Head of Department HOD",
	"mam": "madam professor female faculty",
	"sir": "male professor faculty",
	"staffs": "staff faculty members",
	"sase": "Sasikumar Sasidevi",
	"krce": "K. Ramakrishnan College of Engineering",
	}

	def expand_query(query):
	"""Expand abbreviations and add synonyms for better matching"""
	expanded = query.lower()
	for abbr, full in ABBREVIATIONS.items():
	if abbr in expanded.split():
	expanded = expanded + " " + full
	return expanded

	def search_krce(query, threshold=0.25):
	"""Advanced RAG: Query Expansion + Vector Search + Cross-Encoder Re-ranking"""
	if not rag_model or doc_embeddings is None:
	return ""

	try:
	expanded_query = expand_query(query)

	print(f"\n📊 RAG Search")
	print(f" Original: '{query}'")
	print(f" Expanded: '{expanded_query}'")
	print("-" * 50)

	from sklearn.metrics.pairwise import cosine_similarity
	q_emb = rag_model.encode([expanded_query])
	vector_scores = cosine_similarity(q_emb, doc_embeddings).flatten()

	top_indices = vector_scores.argsort()[-10:][::-1]
	top_candidates = [(idx, vector_scores[idx]) for idx in top_indices]

	print("Vector Search Results:")
	for i, (idx, v) in enumerate(top_candidates[:5]):
	instruction = knowledge_base[idx]['instruction'][:35]
	print(f" #{i+1} V:{v:.3f} \| {instruction}...")

	if cross_encoder:
	pairs = [[query, f"{knowledge_base[idx]['instruction']} {knowledge_base[idx]['output']}"]
	for idx, _ in top_candidates]
	ce_scores = cross_encoder.predict(pairs)

	final_ranking = sorted(zip(top_candidates, ce_scores), key=lambda x: x[1], reverse=True)

	print("Cross-Encoder Re-ranking:")
	for i, ((idx, v), ce) in enumerate(final_ranking[:3]):
	instruction = knowledge_base[idx]['instruction'][:35]
	print(f" #{i+1} CE:{ce:.3f} \| {instruction}...")
	print("-" * 50)

	final_context = []
	print(f"✅ RAG Retrieval (Top 5):")
	for i, ((idx, v), ce) in enumerate(final_ranking[:5]):
	if ce > -6.0:
	content = knowledge_base[idx]['output']
	final_context.append(content)
	print(f" Took #{i+1}: {knowledge_base[idx]['instruction'][:30]}...")

	if final_context:
	return "\n\n".join(final_context)
	else:
	final_context = []
	for i, (idx, score) in enumerate(top_candidates[:5]):
	if score > threshold:
	final_context.append(knowledge_base[idx]['output'])

	if final_context:
	return "\n\n".join(final_context)

	print("❌ No confident match found")
	return ""

	except Exception as e:
	print(f"RAG Error: {e}")
	return ""

	def search_file_context(query: str, session_id: str) -> str:
	"""Search uploaded file for relevant context"""
	if session_id not in session_file_data or not rag_model:
	print(f"⚠️ File context not found: session_id={session_id}, in_session={session_id in session_file_data}")
	return ""

	# Maximum characters to return (prevent context overflow)
	MAX_CONTEXT_CHARS = 2000

	try:
	file_data = session_file_data[session_id]
	chunks = file_data['chunks']
	embeddings = file_data['embeddings']
	filename = file_data.get('filename', 'uploaded file')

	# Detect general queries about the file (summarize, what's in the file, etc.)
	general_triggers = ['summarize', 'summary', 'what is in', "what's in", 'tell me about',
	'describe', 'overview', 'main points', 'key points', 'the file',
	'the document', 'uploaded', 'attached', 'read the']
	is_general_query = any(t in query.lower() for t in general_triggers)

	if is_general_query:
	# For general queries, return content up to limit
	print(f"📄 General file query detected - returning limited content")
	all_content = ""
	for chunk in chunks:
	if len(all_content) + len(chunk) > MAX_CONTEXT_CHARS:
	break
	all_content += chunk + "\n\n"

	if len(all_content) < sum(len(c) for c in chunks):
	all_content += f"\n[...content truncated, showing {len(all_content)} of {sum(len(c) for c in chunks)} chars]"

	return f"[Content from {filename}]:\n\n{all_content.strip()}"

	# For specific queries, use semantic search
	from sklearn.metrics.pairwise import cosine_similarity
	q_emb = rag_model.encode([query])
	scores = cosine_similarity(q_emb, embeddings).flatten()

	# Get top 3 most relevant chunks
	top_indices = scores.argsort()[-3:][::-1]
	context_parts = []
	total_chars = 0

	for idx in top_indices:
	if scores[idx] > 0.15 and total_chars < MAX_CONTEXT_CHARS:
	chunk = chunks[idx]
	if total_chars + len(chunk) > MAX_CONTEXT_CHARS:
	# Truncate to fit
	remaining = MAX_CONTEXT_CHARS - total_chars
	chunk = chunk[:remaining] + "..."
	context_parts.append(chunk)
	total_chars += len(chunk)

	if context_parts:
	print(f"📄 File context found ({len(context_parts)} chunks, {total_chars} chars)")
	return f"[Content from {filename}]:\n\n" + "\n\n".join(context_parts)

	# Fallback: if no good matches, return first chunk truncated
	print(f"📄 Low confidence match - returning truncated first chunk")
	first_chunk = chunks[0][:MAX_CONTEXT_CHARS] if chunks else ""
	return f"[Content from {filename}]:\n\n{first_chunk}"

	except Exception as e:
	print(f"File search error: {e}")
	return ""

	def search_web(query):
	if not ddgs:
	return ""
	try:
	results = ddgs.text(query, max_results=3)
	if not results:
	return ""
	return "\n\n".join([f"{r['title']}\n{r['body']}" for r in results])
	except:
	return ""

	# --- FastAPI ---
	app = FastAPI()
	app.add_middleware(CORSMiddleware, allow_origins=[""], allow_methods=[""], allow_headers=["*"])

	class ChatRequest(BaseModel):
	message: str
	max_tokens: int = 512
	temperature: float = 0.7
	summary: str = ""
	history: list = []
	session_id: str = "" # For file upload sessions

	class SummarizeRequest(BaseModel):
	messages: list

	@app.get("/")
	async def root():
	return {"name": "Krish Mind", "status": "online", "rag": rag_model is not None, "web": ddgs is not None}

	@app.post("/upload")
	async def upload_file(file: UploadFile = File(...), session_id: str = "default"):
	"""Upload a file for RAG analysis"""
	if not rag_model:
	return {"success": False, "error": "Embedding model not available"}

	try:
	# Save uploaded file temporarily
	suffix = '.' + file.filename.split('.')[-1] if '.' in file.filename else '.txt'
	with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
	content = await file.read()
	tmp.write(content)
	tmp_path = tmp.name

	# Extract text
	print(f"📤 Processing uploaded file: {file.filename}")
	text = extract_text_from_file(tmp_path, file.filename)

	# Clean up temp file
	os.unlink(tmp_path)

	if text.startswith("Error"):
	return {"success": False, "error": text}

	if not text.strip():
	return {"success": False, "error": "Could not extract text from file"}

	# Chunk text
	chunks = chunk_text(text)
	if not chunks:
	return {"success": False, "error": "File too small or empty"}

	# Create embeddings
	print(f"🔄 Creating embeddings for {len(chunks)} chunks...")
	embeddings = rag_model.encode(chunks)

	# Store in session
	session_file_data[session_id] = {
	"filename": file.filename,
	"chunks": chunks,
	"embeddings": embeddings,
	"full_text": text[:2000] # First 2000 chars for context
	}

	print(f"✅ File processed: {len(chunks)} chunks, {len(text)} chars")

	return {
	"success": True,
	"filename": file.filename,
	"chunks": len(chunks),
	"chars": len(text),
	"preview": text[:200] + "..." if len(text) > 200 else text
	}

	except Exception as e:
	print(f"❌ Upload error: {e}")
	return {"success": False, "error": str(e)}

	@app.delete("/upload/{session_id}")
	async def clear_file(session_id: str):
	"""Clear uploaded file from session"""
	if session_id in session_file_data:
	del session_file_data[session_id]
	return {"success": True, "message": "File cleared"}
	return {"success": False, "message": "No file found for session"}

	@app.post("/summarize")
	async def summarize(request: SummarizeRequest):
	"""Summarize older messages to compress context"""
	try:
	messages_text = ""
	for msg in request.messages:
	role = msg.get("role", "user")
	content = msg.get("content", "")
	messages_text += f"{role.capitalize()}: {content}\n"

	summary_prompt = f"""<\|start_header_id\|>system<\|end_header_id\|>

	You are a conversation summarizer. Condense the following conversation into a brief summary (2-3 sentences max) that captures the key topics and context. Focus on what was discussed, not exact words.<\|eot_id\|><\|start_header_id\|>user<\|end_header_id\|>

	Summarize this conversation:
	{messages_text}<\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>

	Summary: """

	output = model(summary_prompt, max_tokens=150, temperature=0.3, stop=["<\|eot_id\|>"], echo=False)
	summary = output["choices"][0]["text"].strip()
	print(f"📝 Summarized {len(request.messages)} messages: {summary[:50]}...")
	return {"summary": summary}
	except Exception as e:
	print(f"❌ Summarization error: {e}")
	return {"summary": "", "error": str(e)}


	@app.post("/chat")
	async def chat(request: ChatRequest):
	user_input = request.message
	session_id = request.session_id or "default"

	# Image generation
	img_triggers = ["generate image", "create image", "draw", "imagine"]
	if any(t in user_input.lower() for t in img_triggers):
	prompt = user_input
	for t in img_triggers:
	prompt = prompt.lower().replace(t, "")
	prompt = prompt.strip()
	if prompt:
	url = f"https://image.pollinations.ai/prompt/{urllib.parse.quote(prompt)}"
	return {"response": f"Here's your image of {prompt}:\n\n![{prompt}]({url})"}

	# RAG Search (college knowledge base)
	rag_context = ""
	if rag_model:
	rag_context = search_krce(user_input)
	if rag_context:
	print(f"🧠 RAG Context found: {rag_context[:50]}...")

	# File context (uploaded document)
	file_context = ""
	if session_id in session_file_data:
	file_context = search_file_context(user_input, session_id)
	if file_context:
	print(f"📄 File Context found: {file_context[:50]}...")

	# Web search
	web_context = ""
	search_triggers = ["search", "find", "latest", "news", "who is", "what is", "when", "where", "how"]
	if ddgs and any(t in user_input.lower() for t in search_triggers):
	if len(user_input.split()) > 2:
	print(f"🔎 Searching web...")
	web_context = search_web(user_input)

	# Build prompt
	now = datetime.now().strftime("%A, %B %d, %Y %I:%M %p")

	sys_prompt = f"""You are Krish Mind, a helpful AI assistant created by Krish CS. Current time: {now}

	IMPORTANT STRICT RULES:
	1. IDENTITY: You were created by Krish CS. Do NOT claim to be created by anyone mentioned in the context (like faculty, HODs, or staff). If the context mentions a name, that person is a subject of the data, NOT your creator.
	2. CONTEXT USAGE: Use the provided context to answer questions. If the context contains a list (e.g., faculty names), make sure to include ALL items found in the context chunks.
	3. FORMATTING: Use Markdown. For letters, use bold for headers (e.g., Subject:) and use DOUBLE LINE BREAKS between sections (Place, Date, From, To, Subject, Body) to create clear distinct paragraphs.
	4. AMBIGUITY: 'AID' or 'AIDS' in this context ALWAYS refers to 'Artificial Intelligence and Data Science', NEVER 'Aerospace' or 'Disease'.
	5. ACCURACY: If the context contains a name like 'Mrs. C. Rani', she is a faculty member. Do NOT say "I was created by Mrs. C. Rani".
	"""

	if file_context:
	sys_prompt += f"\n\nUploaded Document Context:\n{file_context}"

	if rag_context:
	sys_prompt += f"\n\nKnowledge Base Context:\n{rag_context}"

	if web_context:
	sys_prompt += f"\n\nWeb Results:\n{web_context}"

	if request.summary:
	sys_prompt += f"\n\nPrevious conversation summary:\n{request.summary}"

	# Build history context
	history_context = ""
	if request.history:
	for msg in request.history[-6:]:
	role = msg.get("role", "user")
	content = msg.get("content", "")
	if role == "user":
	history_context += f"<\|start_header_id\|>user<\|end_header_id\|>\n\n{content}<\|eot_id\|>"
	else:
	history_context += f"<\|start_header_id\|>assistant<\|end_header_id\|>\n\n{content}<\|eot_id\|>"

	# Build full prompt
	if history_context:
	full_prompt = f"""<\|start_header_id\|>system<\|end_header_id\|>

	{sys_prompt}<\|eot_id\|>{history_context}<\|start_header_id\|>user<\|end_header_id\|>

	{user_input}<\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>

	"""
	else:
	full_prompt = f"""<\|start_header_id\|>system<\|end_header_id\|>

	{sys_prompt}<\|eot_id\|><\|start_header_id\|>user<\|end_header_id\|>

	{user_input}<\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>

	"""

	try:
	print(f"💬 Generating response...")
	output = model(full_prompt, max_tokens=request.max_tokens, temperature=request.temperature, stop=["<\|eot_id\|>"], echo=False)
	response = output["choices"][0]["text"].strip()
	print(f"✅ Done")
	return {"response": response}
	except Exception as e:
	return {"response": f"Error: {e}"}

	if __name__ == "__main__":
	print("\n" + "=" * 60)
	print("🚀 Server running at: http://127.0.0.1:8000")
	print("📱 Open index_local.html in your browser")
	print("=" * 60 + "\n")
	uvicorn.run(app, host="0.0.0.0", port=8000)