Spaces:

Rajesh0279
/

Teraflops

Sleeping

App Files Files Community

Teraflops / src /streamlit_app.py

Rajesh0279

Update src/streamlit_app.py

6221602 verified 6 months ago

raw

history blame contribute delete

42.7 kB

	#!/usr/bin/env python3
	"""
	LLM Compatibility Advisor - Streamlined with Download Sizes
	Author: Assistant
	Description: Provides device-based LLM recommendations with popular models and download sizes
	Requirements: streamlit, pandas, plotly, openpyxl
	"""
	import os
	import streamlit as st
	from run2 import run_app2
	import pandas as pd
	import numpy as np
	import re
	import plotly.express as px
	import plotly.graph_objects as go
	import torch
	from typing import Optional, Tuple, List, Dict
	from run3 import estimate_training_time_and_cost,get_gpu_teraflops,get_gpu_cost_per_tflop_hour
	from utils import get_all_models_from_database
	from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
	from huggingface_hub import login

	HUGGINGFACE_TOKEN = "your_huggingface_token_here" # Replace with your actual token

	tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
	model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")

	def load_model():
	login(token=HUGGINGFACE_TOKEN)

	model_id = "meta-llama/Llama-3.1-8B-Instruct"
	tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=True)
	model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=True)
	pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
	return pipe

	pipe = load_model()

	# --- STREAMLIT UI ---

	st.title("💬 LLaMA 3.1 Chatbot")

	# Initialize session state
	if "chat_history" not in st.session_state:
	st.session_state.chat_history = []

	# Chat display
	for msg in st.session_state.chat_history:
	st.markdown(f"You: {msg['user']}")
	st.markdown(f"Bot: {msg['bot']}")

	# User input
	user_input = st.text_input("Type your message:")

	if st.button("Send") and user_input:
	with st.spinner("Generating response..."):
	response = pipe(
	user_input,
	max_new_tokens=200,
	do_sample=True,
	temperature=0.7,
	top_p=0.95,
	num_return_sequences=1,
	)[0]["generated_text"]

	# Post-process: remove prompt from response
	bot_reply = response[len(user_input):].strip()

	# Save to history
	st.session_state.chat_history.append({
	"user": user_input,
	"bot": bot_reply
	})

	# Clear input
	st.experimental_rerun()

	# ADD THIS BLOCK HERE (Line 16)
	# Language configuration
	LANGUAGES = {
	'en': 'English',
	'te': 'తెలుగు',
	'hi': 'हिंदी'
	}

	# Translation dictionaries
	TRANSLATIONS = {
	'en': {
	'title': 'LLM Compatibility Advisor',
	'select_language': 'Select Language',
	'dataset_analysis': 'Dataset Analysis',
	'manual_spec_entry': 'Manual Spec Entry',
	'training_estimator': 'LLM Training Time Estimator',
	'individual_analysis': 'Individual Student Analysis',
	'choose_student': 'Choose a student:',
	'laptop_config': 'Laptop Configuration',
	'mobile_config': 'Mobile Configuration',
	'performance_tier': 'Performance Tier',
	'recommendation': 'Recommendation',
	'notes': 'Notes',
	'batch_analysis': 'Batch Analysis & Insights',
	'student_recommendations': 'Student Recommendations',
	'ram_distribution': 'RAM Distribution Analysis',
	'performance_summary': 'Performance Tier Summary',
	'model_explorer': 'Popular Model Explorer',
	'select_ram_range': 'Select RAM range to explore models:',
	'select_category': 'Select model category:',
	'download_size': 'Download Size',
	'available_on': 'Available on',
	'general_purpose': 'General Purpose',
	'code_specialists': 'Code Specialists',
	'chat_optimized': 'Chat Optimized',
	'reasoning_masters': 'Reasoning Masters',
	'multimodal_models': 'Multimodal Models',
	'recommended_models': 'Recommended Models for'
	},
	'te': {
	'title': 'LLM అనుకూలత సలహాదారు',
	'select_language': 'భాష ఎంచుకోండి',
	'dataset_analysis': 'డేటాసెట్ విశ్లేషణ',
	'manual_spec_entry': 'మాన్యువల్ స్పెక్ ఎంట్రీ',
	'training_estimator': 'LLM శిక్షణ సమయం అంచనా',
	'individual_analysis': 'వ్యక్తిగత విద్యార్థి విశ్లేషణ',
	'choose_student': 'విద్యార్థిని ఎంచుకోండి:',
	'laptop_config': 'ల్యాప్‌టాప్ కాన్ఫిగరేషన్',
	'mobile_config': 'మొబైల్ కాన్ఫిగరేషన్',
	'performance_tier': 'పనితీరు శ్రేణి',
	'recommendation': 'సిఫార్సు',
	'notes': 'గమనికలు',
	'batch_analysis': 'బ్యాచ్ విశ్లేషణ మరియు అంతర్దృష్టులు',
	'student_recommendations': 'విద్యార్థి సిఫార్సులు',
	'ram_distribution': 'RAM పంపిణీ విశ్లేషణ',
	'performance_summary': 'పనితీరు శ్రేణి సారాంశం',
	'model_explorer': 'జనాదరణ పొందిన మోడల్ ఎక్స్‌ప్లోరర్',
	'select_ram_range': 'మోడల్‌లను అన్వేషించడానికి RAM పరిధిని ఎంచుకోండి:',
	'select_category': 'మోడల్ వర్గాన్ని ఎంచుకోండి:',
	'download_size': 'డౌన్‌లోడ్ పరిమాణం',
	'available_on': 'అందుబాటులో',
	'general_purpose': 'సాధారణ ప్రయోజనం',
	'code_specialists': 'కోడ్ నిపుణులు',
	'chat_optimized': 'చాట్ అనుకూలీకరించబడింది',
	'reasoning_masters': 'తర్క నిపుణులు',
	'multimodal_models': 'మల్టీమోడల్ మోడల్స్',
	'recommended_models': 'సిఫార్సు చేసిన మోడల్స్'
	},
	'hi': {
	'title': 'LLM संगतता सलाहकार',
	'select_language': 'भाषा चुनें',
	'dataset_analysis': 'डेटासेट विश्लेषण',
	'manual_spec_entry': 'मैनुअल स्पेक एंट्री',
	'training_estimator': 'LLM प्रशिक्षण समय अनुमानक',
	'individual_analysis': 'व्यक्तिगत छात्र विश्लेषण',
	'choose_student': 'छात्र चुनें:',
	'laptop_config': 'लैपटॉप कॉन्फ़िगरेशन',
	'mobile_config': 'मोबाइल कॉन्फ़िगरेशन',
	'performance_tier': 'प्रदर्शन स्तर',
	'recommendation': 'सिफारिश',
	'notes': 'नोट्स',
	'batch_analysis': 'बैच विश्लेषण और अंतर्दृष्टि',
	'student_recommendations': 'छात्र सिफारिशें',
	'ram_distribution': 'RAM वितरण विश्लेषण',
	'performance_summary': 'प्रदर्शन स्तर सारांश',
	'model_explorer': 'लोकप्रिय मॉडल एक्सप्लोरर',
	'select_ram_range': 'मॉडल एक्सप्लोर करने के लिए RAM रेंज चुनें:',
	'select_category': 'मॉडल श्रेणी चुनें:',
	'download_size': 'डाउनलोड आकार',
	'available_on': 'उपलब्ध है',
	'general_purpose': 'सामान्य प्रयोजन',
	'code_specialists': 'कोड विशेषज्ञ',
	'chat_optimized': 'चैट अनुकूलित',
	'reasoning_masters': 'तर्क विशेषज्ञ',
	'multimodal_models': 'मल्टीमॉडल मॉडल्स',
	'recommended_models': 'अनुशंसित मॉडल'
	}
	}

	@st.cache_resource
	def load_llama3_pipeline():
	tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
	model = AutoModelForCausalLM.from_pretrained(
	"meta-llama/Llama-3.1-8B-Instruct",
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device_map="auto" if torch.cuda.is_available() else None
	)
	return tokenizer, model

	tokenizer, model = load_llama3_pipeline()

	st.title("🧠 Chat with Llama 3.1 8B (Instruct)")

	if 'chat_history' not in st.session_state:
	st.session_state.chat_history = [
	{"role": "system", "content": "You are a helpful, concise assistant."}
	]

	user_input = st.text_input("You:", key="user_input")

	if user_input:
	st.session_state.chat_history.append({"role": "user", "content": user_input})

	# Format messages into prompt
	messages = st.session_state.chat_history
	prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

	with st.spinner("Llama 3 is thinking..."):
	output = model.generate(
	**inputs,
	max_new_tokens=512,
	temperature=0.7,
	do_sample=True,
	top_p=0.9,
	pad_token_id=tokenizer.eos_token_id
	)

	decoded = tokenizer.decode(output[0], skip_special_tokens=True)
	response = decoded.split(prompt)[-1].strip()

	st.session_state.chat_history.append({"role": "assistant", "content": response})

	# Display conversation
	for msg in st.session_state.chat_history:
	if msg["role"] == "user":
	st.markdown(f"You: {msg['content']}")
	elif msg["role"] == "assistant":
	st.markdown(f"AI: {msg['content']}")

	def get_text(key, lang='en'):
	"""Get translated text for given key and language"""
	return TRANSLATIONS.get(lang, TRANSLATIONS['en']).get(key, key)

	def init_session_state():
	"""Initialize session state variables"""
	if 'language' not in st.session_state:
	st.session_state.language = 'en'
	# END OF ADDITION

	# ✅ MUST be the first Streamlit command
	st.set_page_config(
	page_title="LLM Compatibility Advisor",
	layout="wide",
	page_icon="",
	initial_sidebar_state="expanded"
	)

	init_session_state()

	# Enhanced data loading with error handling
	def run_app1():

	@st.cache_data
	def load_data():
	paths = [
	"src/BITS_INTERNS.xlsx",
	"src/ICFAI.xlsx"
	]

	combined_df = pd.DataFrame()
	for path in paths:
	try:
	df = pd.read_excel(path, sheet_name="Form Responses 1")
	df.columns = df.columns.str.strip()
	combined_df = pd.concat([combined_df, df], ignore_index=True)
	except FileNotFoundError:
	return None, f"Excel file '{path}' not found. Please upload the file."
	except Exception as e:
	return None, f"Error loading '{path}': {str(e)}"

	# Return success case - this was missing!
	if combined_df.empty:
	return None, "No data found in Excel files."
	else:
	return combined_df, None

	# Enhanced RAM extraction with better parsing
	def extract_numeric_ram(ram) -> Optional[int]:
	if pd.isna(ram):
	return None

	ram_str = str(ram).lower().replace(" ", "")

	# Handle various formats: "8GB", "8 GB", "8gb", "8192MB", etc.
	gb_match = re.search(r"(\d+(?:\.\d+)?)(?:gb\|g)", ram_str)
	if gb_match:
	return int(float(gb_match.group(1)))

	# Handle MB format
	mb_match = re.search(r"(\d+)(?:mb\|m)", ram_str)
	if mb_match:
	return max(1, int(int(mb_match.group(1)) / 1024)) # Convert MB to GB

	# Handle plain numbers (assume GB)
	plain_match = re.search(r"(\d+)", ram_str)
	if plain_match:
	return int(plain_match.group(1))

	return None

	# Streamlined LLM database with popular models and download sizes
	# REPLACE the existing recommend_llm function (around Line 132) with this:
	def recommend_llm(ram_str, lang='en') -> Tuple[str, str, str, Dict[str, List[Dict]]]:
	"""Returns (recommendation, performance_tier, additional_info, detailed_models)"""
	ram = extract_numeric_ram(ram_str)

	# Localized recommendations
	recommendations = {
	'en': {
	'ultra_low': "🔸 Ultra-lightweight models - basic NLP tasks",
	'low': "🔸 Small language models - decent capabilities",
	'moderate_low': "🟠 Mid-range models - good general performance",
	'moderate': "🟠 Strong 7B models - excellent capabilities",
	'good': "🟢 High-quality models - premium performance",
	'high': "🔵 Premium models - professional grade",
	'ultra_high': "🔵 Top-tier models - enterprise capabilities",
	'unknown': "⚪ Check exact specs or test with quantized models."
	},
	'te': {
	'ultra_low': "🔸 అల్ట్రా-లైట్‌వెయిట్ మోడల్స్ - ప్రాథమిక NLP పనులు",
	'low': "🔸 చిన్న భాష మోడల్స్ - మంచి సామర్థ్యాలు",
	'moderate_low': "🟠 మధ్య-శ్రేణి మోడల్స్ - మంచి సాధారణ పనితీరు",
	'moderate': "🟠 బలమైన 7B మోడల్స్ - అద్భుతమైన సామర్థ్యాలు",
	'good': "🟢 అధిక-నాణ్యత మోడల్స్ - ప్రీమియం పనితీరు",
	'high': "🔵 ప్రీమియం మోడల్స్ - వృత్తిపరమైన గ్రేడ్",
	'ultra_high': "🔵 టాప్-టైర్ మోడల్స్ - ఎంటర్‌ప్రైజ్ సామర్థ్యాలు",
	'unknown': "⚪ ఖచ్చితమైన స్పెక్స్ చెక్ చేయండి లేదా క్వాంటైజ్డ్ మోడల్స్‌తో టెస్ట్ చేయండి."
	},
	'hi': {
	'ultra_low': "🔸 अल्ट्रा-लाइटवेट मॉडल - बुनियादी NLP कार्य",
	'low': "🔸 छोटे भाषा मॉडल - अच्छी क्षमताएं",
	'moderate_low': "🟠 मध्यम-श्रेणी मॉडल - अच्छा सामान्य प्रदर्शन",
	'moderate': "🟠 मजबूत 7B मॉडल - उत्कृष्ट क्षमताएं",
	'good': "🟢 उच्च-गुणवत्ता मॉडल - प्रीमियम प्रदर्शन",
	'high': "🔵 प्रीमियम मॉडल - व्यावसायिक ग्रेड",
	'ultra_high': "🔵 टॉप-टियर मॉडल - एंटरप्राइज़ क्षमताएं",
	'unknown': "⚪ सटीक स्पेक्स जांचें या क्वांटाइज़्ड मॉडल के साथ परीक्षण करें।"
	}
	}

	info_text = {
	'en': {
	'ultra_low': "Mobile-optimized, simple tasks, limited context",
	'low': "Basic chat, simple reasoning, text classification",
	'moderate_low': "Solid reasoning, coding help, longer conversations",
	'moderate': "Professional use, coding assistance, complex reasoning",
	'good': "Advanced tasks, multimodal support, research use",
	'high': "Enterprise ready, complex reasoning, specialized tasks",
	'ultra_high': "Research grade, maximum performance, domain expertise",
	'unknown': "Verify RAM specifications"
	},
	'te': {
	'ultra_low': "మొబైల్-అనుకూలీకరించబడిన, సాధారణ పనులు, పరిమిత సందర్భం",
	'low': "ప్రాథమిక చాట్, సాధారణ తర్కం, టెక్స్ట్ వర్గీకరణ",
	'moderate_low': "దృఢమైన తర్కం, కోడింగ్ సహాయం, పొడవైన సంభాషణలు",
	'moderate': "వృత్తిపరమైన ఉపయోగం, కోడింగ్ సహాయం, సంక్లిష్ట తర్కం",
	'good': "అధునాతన పనులు, మల్టీమోడల్ మద్దతు, పరిశోధన ఉపయోగం",
	'high': "ఎంటర్‌ప్రైజ్ సిద్ధం, సంక్లిష్ట తర్కం, ప్రత్యేక పనులు",
	'ultra_high': "పరిశోధనా గ్రేడ్, గరిష్ట పనితీరు, డొమైన్ నైపుణ్యం",
	'unknown': "RAM స్పెసిఫికేషన్లను ధృవీకరించండి"
	},
	'hi': {
	'ultra_low': "मोबाइल-अनुकूलित, सरल कार्य, सीमित संदर्भ",
	'low': "बुनियादी चैट, सरल तर्क, टेक्स्ट वर्गीकरण",
	'moderate_low': "ठोस तर्क, कोडिंग सहायता, लंबी बातचीत",
	'moderate': "व्यावसायिक उपयोग, कोडिंग सहायता, जटिल तर्क",
	'good': "उन्नत कार्य, मल्टीमॉडल समर्थन, अनुसंधान उपयोग",
	'high': "एंटरप्राइज़ तैयार, जटिल तर्क, विशेष कार्य",
	'ultra_high': "अनुसंधान ग्रेड, अधिकतम प्रदर्शन, डोमेन विशेषज्ञता",
	'unknown': "RAM विनिर्देशों को सत्यापित करें"
	}
	}

	if ram is None:
	return (recommendations[lang]['unknown'],
	"Unknown",
	info_text[lang]['unknown'],
	{})

	if ram <= 2:
	models = LLM_DATABASE["ultra_low"]
	return (recommendations[lang]['ultra_low'],
	"Ultra Low",
	info_text[lang]['ultra_low'],
	models)
	elif ram <= 4:
	models = LLM_DATABASE["low"]
	return (recommendations[lang]['low'],
	"Low",
	info_text[lang]['low'],
	models)
	elif ram <= 6:
	models = LLM_DATABASE["moderate_low"]
	return (recommendations[lang]['moderate_low'],
	"Moderate-Low",
	info_text[lang]['moderate_low'],
	models)
	elif ram <= 8:
	models = LLM_DATABASE["moderate"]
	return (recommendations[lang]['moderate'],
	"Moderate",
	info_text[lang]['moderate'],
	models)
	elif ram <= 16:
	models = LLM_DATABASE["good"]
	return (recommendations[lang]['good'],
	"Good",
	info_text[lang]['good'],
	models)
	elif ram <= 32:
	models = LLM_DATABASE["high"]
	return (recommendations[lang]['high'],
	"High",
	info_text[lang]['high'],
	models)
	else:
	models = LLM_DATABASE["ultra_high"]
	return (recommendations[lang]['ultra_high'],
	"Ultra High",
	info_text[lang]['ultra_high'],
	models)

	# Enhanced OS detection with better icons
	def get_os_info(os_name) -> Tuple[str, str]:
	"""Returns (icon, clean_name)"""
	if pd.isna(os_name):
	return "💻", "Not specified"

	os = str(os_name).lower()
	if "windows" in os:
	return "🪟", os_name
	elif "mac" in os or "darwin" in os:
	return "🍎", os_name
	elif "linux" in os or "ubuntu" in os:
	return "🐧", os_name
	elif "android" in os:
	return "🤖", os_name
	elif "ios" in os:
	return "📱", os_name
	else:
	return "💻", os_name

	# Performance visualization
	def create_performance_chart(df):
	"""Create a performance distribution chart"""
	laptop_rams = df["Laptop RAM"].apply(extract_numeric_ram).dropna()
	mobile_rams = df["Mobile RAM"].apply(extract_numeric_ram).dropna()

	fig = go.Figure()

	fig.add_trace(go.Histogram(
	x=laptop_rams,
	name="Laptop RAM",
	opacity=0.7,
	nbinsx=10
	))

	fig.add_trace(go.Histogram(
	x=mobile_rams,
	name="Mobile RAM",
	opacity=0.7,
	nbinsx=10
	))

	fig.update_layout(
	title="RAM Distribution Across Devices",
	xaxis_title="RAM (GB)",
	yaxis_title="Number of Students",
	barmode='overlay',
	height=400
	)

	return fig

	# Enhanced model details display function

	def display_model_categories(models_dict: Dict[str, List[Dict]], ram_gb: int, lang='en'):
	"""Display models organized by category with download sizes"""
	if not models_dict:
	return

	st.markdown(f"### 🎯 {get_text('recommended_models', lang)} {ram_gb}GB RAM:")

	category_names = {
	'en': {'general': 'General', 'code': 'Code', 'chat': 'Chat', 'reasoning': 'Reasoning', 'multimodal': 'Multimodal'},
	'te': {'general': 'సాధారణ', 'code': 'కోడ్', 'chat': 'చాట్', 'reasoning': 'తర్కం', 'multimodal': 'మల్టీమోడల్'},
	'hi': {'general': 'सामान्य', 'code': 'कोड', 'chat': 'चैट', 'reasoning': 'तर्क', 'multimodal': 'मल्टीमॉडल'}
	}

	for category, model_list in models_dict.items():
	if model_list:
	category_display = category_names[lang].get(category, category.title())
	with st.expander(f"📂 {category_display} Models"):
	for model in model_list[:8]: # Limit to top 8 per category
	col1, col2, col3, col4 = st.columns([3, 1, 2, 4])
	with col1:
	st.markdown(f"{model['name']}")
	with col2:
	st.markdown(f"`{model['size']}`")
	with col3:
	st.markdown(f"{model['description']}")
	with col4:
	st.markdown(f"{model['cost(A100)']}")
	# Demo data generator for when Excel files are not available
	def generate_demo_data():
	"""Generate demo data for testing when Excel files are missing"""
	demo_data = {
	"Full Name": [
	"Demo Student 1", "Demo Student 2", "Demo Student 3", "Demo Student 4",
	"Demo Student 5", "Demo Student 6", "Demo Student 7", "Demo Student 8"
	],
	"Laptop RAM": ["8GB", "16GB", "4GB", "32GB", "6GB", "12GB", "2GB", "24GB"],
	"Mobile RAM": ["4GB", "8GB", "3GB", "12GB", "6GB", "4GB", "2GB", "8GB"],
	"Laptop Operating System": [
	"Windows 11", "macOS Monterey", "Ubuntu 22.04", "Windows 10",
	"macOS Big Sur", "Fedora 36", "Windows 11", "macOS Ventura"
	],
	"Mobile Operating System": [
	"Android 13", "iOS 16", "Android 12", "iOS 15",
	"Android 14", "iOS 17", "Android 11", "iOS 16"
	]
	}
	return pd.DataFrame(demo_data)

	# Function to safely prepare user options
	def prepare_user_options(df):
	"""Safely prepare user options for selectbox, handling NaN values and mixed types"""
	try:
	# Get unique names and filter out NaN values
	unique_names = df["Full Name"].dropna().unique()

	# Convert to strings and filter out any remaining non-string values
	valid_names = []
	for name in unique_names:
	try:
	str_name = str(name).strip()
	if str_name and str_name.lower() != 'nan':
	valid_names.append(str_name)
	except:
	continue

	# Create options list with proper string concatenation
	options = ["Select a student..."] + sorted(valid_names)
	return options
	except Exception as e:
	st.error(f"Error preparing user options: {e}")
	return ["Select a student..."]

	# Main App
	st.title(get_text('title', st.session_state.language))
	tab1, tab2, tab3 = st.tabs([
	f"📊 {get_text('dataset_analysis', st.session_state.language)}",
	f"⚙️ {get_text('manual_spec_entry', st.session_state.language)}",
	f"🧠 {get_text('training_estimator', st.session_state.language)}"
	])

	with tab1:
	st.markdown("Get personalized recommendations from 150+ popular open source AI models with download sizes!")
	# Load data with better error handling
	df, error = load_data()

	if error or df is None or df.empty:
	st.warning("⚠️ Excel files not found. Running with demo data for testing.")
	st.info("📁 To use real data, place 'BITS_INTERNS.xlsx' and 'ICFAI.xlsx' in the 'src/' directory.")
	df = generate_demo_data()

	with st.expander("📋 Expected Data Format"):
	st.markdown("""
	The app expects Excel files with the following columns:
	- Full Name: Student name
	- Laptop RAM: RAM specification (e.g., "8GB", "16 GB", "8192MB")
	- Mobile RAM: Mobile device RAM
	- Laptop Operating System: OS name
	- Mobile Operating System: Mobile OS name
	""")

	# Verify required columns exist
	required_columns = ["Full Name", "Laptop RAM", "Mobile RAM"]
	missing_columns = [col for col in required_columns if col not in df.columns]

	if missing_columns:
	st.error(f"Missing required columns: {missing_columns}")
	st.info("Please ensure your Excel file contains the required columns.")
	st.stop()

	# Clean the dataframe
	df = df.copy()
	df["Full Name"] = df["Full Name"].astype(str).str.strip()

	# Sidebar filters and info
	with st.sidebar:
	st.header("🔍 Filters & Info")
	# Language selector
	st.subheader("🌐 Language / భాష / भाषा")
	selected_language = st.selectbox(
	get_text('select_language', st.session_state.language),
	options=list(LANGUAGES.keys()),
	format_func=lambda x: LANGUAGES[x],
	index=list(LANGUAGES.keys()).index(st.session_state.language)
	)

	if selected_language != st.session_state.language:
	st.session_state.language = selected_language
	st.rerun()

	st.markdown("---")

	# Performance tier filter
	performance_filter = st.multiselect(
	"Filter by Performance Tier:",
	["Ultra Low", "Low", "Moderate-Low", "Moderate", "Good", "High", "Ultra High", "Unknown"],
	default=["Ultra Low", "Low", "Moderate-Low", "Moderate", "Good", "High", "Ultra High", "Unknown"]
	)

	# Model category filter
	st.subheader("Model Categories")
	show_categories = st.multiselect(
	"Show specific categories:",
	["general", "code", "chat", "reasoning", "multimodal"],
	default=["general", "code", "chat"]
	)

	st.markdown("---")
	st.markdown("### 📊 Quick Stats")
	st.metric("Total Students", len(df))
	st.metric("Popular Models", "150+")

	# Calculate average RAM
	avg_laptop_ram = df["Laptop RAM"].apply(extract_numeric_ram).mean()
	avg_mobile_ram = df["Mobile RAM"].apply(extract_numeric_ram).mean()

	if not pd.isna(avg_laptop_ram):
	st.metric("Avg Laptop RAM", f"{avg_laptop_ram:.1f} GB")
	if not pd.isna(avg_mobile_ram):
	st.metric("Avg Mobile RAM", f"{avg_mobile_ram:.1f} GB")

	# User selection with search - FIXED VERSION
	# REPLACE the existing section (around Line 380) with this:
	# User selection with search - FIXED VERSION
	st.subheader(f"👤 {get_text('individual_analysis', st.session_state.language)}")

	# Prepare options safely
	user_options = prepare_user_options(df)

	selected_user = st.selectbox(
	get_text('choose_student', st.session_state.language),
	options=user_options,
	index=0 # Default to first option ("Select a student...")
	)

	# REPLACE the existing configuration display (around Line 393) with this:
	if selected_user and selected_user != "Select a student...":
	# Find user data with safe lookup
	user_data_mask = df["Full Name"].astype(str).str.strip() == selected_user
	if user_data_mask.any():
	user_data = df[user_data_mask].iloc[0]

	# Enhanced user display
	col1, col2 = st.columns(2)

	with col1:
	st.markdown(f"### 💻 {get_text('laptop_config', st.session_state.language)}")
	laptop_os_icon, laptop_os_name = get_os_info(user_data.get('Laptop Operating System'))
	laptop_ram = user_data.get('Laptop RAM', 'Not specified')
	laptop_rec, laptop_tier, laptop_info, laptop_models = recommend_llm(laptop_ram, st.session_state.language)
	laptop_ram_gb = extract_numeric_ram(laptop_ram) or 0

	st.markdown(f"OS: {laptop_os_icon} {laptop_os_name}")
	st.markdown(f"RAM: {laptop_ram}")
	st.markdown(f"{get_text('performance_tier', st.session_state.language)}: {laptop_tier}")

	st.success(f"💡 {get_text('recommendation', st.session_state.language)}: {laptop_rec}")
	st.info(f"ℹ️ {get_text('notes', st.session_state.language)}: {laptop_info}")

	# Display detailed models for laptop
	if laptop_models:
	filtered_models = {k: v for k, v in laptop_models.items() if k in show_categories}
	display_model_categories(filtered_models, laptop_ram_gb, st.session_state.language)

	with col2:
	st.markdown(f"### 📱 {get_text('mobile_config', st.session_state.language)}")
	mobile_os_icon, mobile_os_name = get_os_info(user_data.get('Mobile Operating System'))
	mobile_ram = user_data.get('Mobile RAM', 'Not specified')
	mobile_rec, mobile_tier, mobile_info, mobile_models = recommend_llm(mobile_ram, st.session_state.language)
	mobile_ram_gb = extract_numeric_ram(mobile_ram) or 0

	st.markdown(f"OS: {mobile_os_icon} {mobile_os_name}")
	st.markdown(f"RAM: {mobile_ram}")
	st.markdown(f"{get_text('performance_tier', st.session_state.language)}: {mobile_tier}")

	st.success(f"💡 {get_text('recommendation', st.session_state.language)}: {mobile_rec}")
	st.info(f"ℹ️ {get_text('notes', st.session_state.language)}: {mobile_info}")

	# Display detailed models for mobile
	if mobile_models:
	filtered_models = {k: v for k, v in mobile_models.items() if k in show_categories}
	display_model_categories(filtered_models, mobile_ram_gb, st.session_state.language)
	# Batch Analysis Section
	# REPLACE the existing batch analysis section (around Line 436) with this:
	# Batch Analysis Section
	st.markdown("---")
	st.header(f"📊 {get_text('batch_analysis', st.session_state.language)}")

	# Create enhanced batch table
	df_display = df[["Full Name", "Laptop RAM", "Mobile RAM"]].copy()

	# Add recommendations and performance tiers
	laptop_recommendations = df["Laptop RAM"].apply(lambda x: recommend_llm(x, st.session_state.language)[0])
	mobile_recommendations = df["Mobile RAM"].apply(lambda x: recommend_llm(x, st.session_state.language)[0])
	laptop_tiers = df["Laptop RAM"].apply(lambda x: recommend_llm(x, st.session_state.language)[1])
	mobile_tiers = df["Mobile RAM"].apply(lambda x: recommend_llm(x, st.session_state.language)[1])

	df_display["Laptop LLM"] = laptop_recommendations
	df_display["Mobile LLM"] = mobile_recommendations
	df_display["Laptop Tier"] = laptop_tiers
	df_display["Mobile Tier"] = mobile_tiers

	# Filter based on sidebar selections
	mask = (laptop_tiers.isin(performance_filter) \| mobile_tiers.isin(performance_filter))
	df_filtered = df_display[mask]

	# Display filtered table
	st.subheader(f"📋 {get_text('student_recommendations', st.session_state.language)} ({len(df_filtered)} students)")
	st.dataframe(
	df_filtered,
	use_container_width=True,
	column_config={
	"Full Name": st.column_config.TextColumn("Student Name", width="medium"),
	"Laptop RAM": st.column_config.TextColumn("Laptop RAM", width="small"),
	"Mobile RAM": st.column_config.TextColumn("Mobile RAM", width="small"),
	"Laptop LLM": st.column_config.TextColumn("Laptop Recommendation", width="large"),
	"Mobile LLM": st.column_config.TextColumn("Mobile Recommendation", width="large"),
	"Laptop Tier": st.column_config.TextColumn("L-Tier", width="small"),
	"Mobile Tier": st.column_config.TextColumn("M-Tier", width="small"),
	}
	)

	# Performance distribution chart
	if len(df) > 1:
	st.subheader("📈 RAM Distribution Analysis")
	fig = create_performance_chart(df)
	st.plotly_chart(fig, use_container_width=True)

	# Performance tier summary
	st.subheader("🎯 Performance Tier Summary")
	tier_col1, tier_col2 = st.columns(2)

	with tier_col1:
	st.markdown("Laptop Performance Tiers:")
	laptop_tier_counts = laptop_tiers.value_counts()
	for tier, count in laptop_tier_counts.items():
	percentage = (count / len(laptop_tiers)) * 100
	st.write(f"• {tier}: {count} students ({percentage:.1f}%)")

	with tier_col2:
	st.markdown("Mobile Performance Tiers:")
	mobile_tier_counts = mobile_tiers.value_counts()
	for tier, count in mobile_tier_counts.items():
	percentage = (count / len(mobile_tier_counts)) * 100
	st.write(f"• {tier}: {count} students ({percentage:.1f}%)")

	# Model Explorer Section
	st.markdown("---")
	st.header("🔍 Popular Model Explorer")

	explorer_col1, explorer_col2 = st.columns(2)

	with explorer_col1:
	selected_ram_range = st.selectbox(
	"Select RAM range to explore models:",
	["≤2GB (Ultra Low)", "3-4GB (Low)", "5-6GB (Moderate-Low)",
	"7-8GB (Moderate)", "9-16GB (Good)", "17-32GB (High)", ">32GB (Ultra High)"]
	)

	with explorer_col2:
	selected_category = st.selectbox(
	"Select model category:",
	["general", "code", "chat", "reasoning", "multimodal"]
	)

	# Map selection to database key
	ram_mapping = {
	"≤2GB (Ultra Low)": "ultra_low",
	"3-4GB (Low)": "low",
	"5-6GB (Moderate-Low)": "moderate_low",
	"7-8GB (Moderate)": "moderate",
	"9-16GB (Good)": "good",
	"17-32GB (High)": "high",
	">32GB (Ultra High)": "ultra_high"
	}

	selected_ram_key = ram_mapping[selected_ram_range]
	if selected_ram_key in LLM_DATABASE and selected_category in LLM_DATABASE[selected_ram_key]:
	models = LLM_DATABASE[selected_ram_key][selected_category]

	st.subheader(f"🎯 {selected_category.title()} Models for {selected_ram_range}")

	# Display models in a detailed table
	for model in models:
	with st.container():
	col1, col2, col3 = st.columns([3, 1, 3])
	with col1:
	st.markdown(f"### {model['name']}")
	with col2:
	st.markdown(f"{model['size']}")
	st.caption("Download Size")
	with col3:
	st.markdown(f"{model['description']}")
	# Add download suggestion
	if "Llama" in model['name']:
	st.caption("🔗 Available on Hugging Face & Ollama")
	elif "Mistral" in model['name']:
	st.caption("🔗 Available on Hugging Face & Mistral AI")
	elif "Gemma" in model['name']:
	st.caption("🔗 Available on Hugging Face & Google")
	else:
	st.caption("🔗 Available on Hugging Face")
	st.markdown("---")
	else:
	st.info(f"No {selected_category} models available for {selected_ram_range}")

	# Enhanced reference guide
	with st.expander("📘 Model Guide & Download Information"):
	st.markdown("""
	## 🚀 Popular Models by Category

	### 🎯 General Purpose Champions
	- Llama-2 Series: Meta's flagship models (7B, 13B, 70B)
	- Mistral Series: Excellent efficiency and performance
	- Gemma: Google's efficient models (2B, 7B)
	- Phi: Microsoft's compact powerhouses

	### 💻 Code Specialists
	- CodeLlama: Meta's dedicated coding models
	- StarCoder: BigCode's programming experts
	- WizardCoder: Enhanced coding capabilities
	- DeepSeek-Coder: Chinese tech giant's coder

	### 💬 Chat Optimized
	- Vicuna: UC Berkeley's ChatGPT alternative
	- Zephyr: HuggingFace's chat specialist
	- OpenChat: High-quality conversation models
	- Neural-Chat: Intel-optimized chat models

	### 🧮 Reasoning Masters
	- WizardMath: Mathematical problem solving
	- MetaMath: Advanced arithmetic reasoning
	- Orca-2: Microsoft's reasoning specialist
	- Goat: Specialized arithmetic model

	### 👁️ Multimodal Models
	- LLaVA: Large Language and Vision Assistant
	- MiniGPT-4: Multimodal conversational AI

	## 💾 Download Size Reference

	\| Model Size \| FP16 \| 8-bit \| 4-bit \| Use Case \|
	\|------------\|------\|-------\|-------\|----------\|
	\| 1-3B \| 2-6GB \| 1-3GB \| 0.5-1.5GB \| Mobile, Edge \|
	\| 7B \| 13GB \| 7GB \| 3.5GB \| Desktop, Laptop \|
	\| 13B \| 26GB \| 13GB \| 7GB \| Workstation \|
	\| 30-34B \| 60GB \| 30GB \| 15GB \| Server, Cloud \|
	\| 70B \| 140GB \| 70GB \| 35GB \| High-end Server \|

	## 🛠️ Where to Download

	### Primary Sources
	- 🤗 Hugging Face: Largest repository with 400,000+ models
	- 🦙 Ollama: Simple CLI tool for local deployment
	- 📦 LM Studio: User-friendly GUI for model management

	### Quantized Formats
	- GGUF: Best for CPU inference (llama.cpp)
	- GPTQ: GPU-optimized quantization
	- AWQ: Advanced weight quantization

	### Download Tips
	- Use `git lfs` for large models from Hugging Face
	- Consider bandwidth and storage before downloading
	- Start with 4-bit quantized versions for testing
	- Use `ollama pull model_name` for easiest setup

	## 🔧 Optimization Strategies

	### Memory Reduction
	- 4-bit quantization: 75% memory reduction
	- 8-bit quantization: 50% memory reduction
	- CPU offloading: Use system RAM for overflow

	### Speed Optimization
	- GPU acceleration: CUDA, ROCm, Metal
	- Batch processing: Process multiple requests
	- Context caching: Reuse computations
	""")

	# Footer with updated resources
	st.markdown("---")
	st.markdown("""
	### 🔗 Essential Download & Deployment Tools
	📦 Easy Model Deployment:
	- [Ollama](https://ollama.ai/) – `curl -fsSL https://ollama.ai/install.sh \| sh`
	- [LM Studio](https://lmstudio.ai/) – Drag-and-drop GUI for running models locally
	- [GPT4All](https://gpt4all.io/) – Cross-platform desktop app for local LLMs
	🤗 Model Repositories:
	- [Hugging Face Hub](https://huggingface.co/models) – Filter by model size, task, and license
	- [TheBloke's Quantizations](https://huggingface.co/TheBloke) – Pre-quantized models in GGUF/GPTQ format
	- [Awesome LLM](https://github.com/Hannibal046/Awesome-LLMs) – Curated list of models and resources
	---
	""")

	with tab2:
	run_app2()


	with tab3:
	st.title("🧠 LLM Training Time & Cost Estimator")

	# Load and prepare model list
	model_list = get_all_models_from_database(LLM_DATABASE)
	dropdown_options = [m["display"] for m in model_list]

	# Dropdown menu
	selected_display = st.selectbox("Select a Model", dropdown_options)
	selected_model = next((m for m in model_list if m["display"] == selected_display), None)

	# Convert size to params in billions (very rough approx.)
	if "GB" in selected_model["size"]:
	size_val = float(selected_model["size"].replace("GB", "").strip())
	elif "MB" in selected_model["size"]:
	size_val = float(selected_model["size"].replace("MB", "").strip()) / 1024
	else:
	size_val = 1.0 # default

	params = size_val
	tokens = st.number_input("Training Tokens (B)", min_value=1.0, value=300.0)

	# Select compute method
	gpu_choice = st.radio("Choose Compute Source", ["Manual TFLOPs", "A100", "H100", "Exo"])

	if gpu_choice == "Manual TFLOPs":
	teraflops = st.number_input("TFLOPs/s", min_value=1.0, value=100.0)
	cost_per_tflop_hr = st.number_input("₹ Cost per TFLOP-Hour", min_value=0.0, value=0.0)
	elif gpu_choice == "Exo":
	exo_flops = st.number_input("TFLOPs from Exo", min_value=1.0)
	teraflops = get_gpu_teraflops("Exo", exo_flops)
	cost_per_tflop_hr = st.number_input("₹ Cost per TFLOP-Hour (Exo)", min_value=0.0, value=0.0)
	else:
	teraflops = get_gpu_teraflops(gpu_choice)
	cost_str = selected_model.get(f"cost_{gpu_choice.lower()}", "₹0").replace("₹", "").replace(",", "")
	cost_per_tflop_hr = float(cost_str) / 100 # rough est: ₹ per 100 TFLOP-hr
	st.info(f"{gpu_choice}: ₹{cost_per_tflop_hr:.2f} per TFLOP-Hour")

	# Estimate
	if st.button("Estimate Time & Cost"):
	result = estimate_training_time_and_cost(params, tokens, teraflops, cost_per_tflop_hr)
	st.success(f"""
	📊 Model: {selected_model['name']}
	🧠 Params (est): {params:.2f}B
	🔢 FLOPs Required: {result['flops_required']:.2e}
	⏱️ Time: {result['time_hours']:.2f} hrs / {result['time_days']:.2f} days
	💸 Cost: ₹{result['total_cost']:.2f}
	⚙️ Compute Used: {teraflops} TFLOPs/s
	""")