Spaces:

ehsanshahid522
/

aml-project-deploy

Sleeping

File size: 18,613 Bytes

2d802f0

from flask import Flask, render_template, request
from werkzeug.utils import secure_filename
import os
import numpy as np
from PIL import Image
from transformers import pipeline
from gtts import gTTS
import speech_recognition as sr
import librosa

# Try importing tensorflow, handle if missing
try:
    from tensorflow.keras.models import load_model
except ImportError:
    load_model = None

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = 'static/uploads'
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)

# Dataset paths (Colab format - update these for local use)
TRAIN_DIR = "/content/drive/MyDrive/AML-F24/Code/image_datset/image_datset/train"
TEST_DIR = "/content/drive/MyDrive/AML-F24/Code/image_datset/image_datset/test"

# ---------------- MODELS ---------------- #
from models_loader import loader

sentiment_model = loader.sentiment_pipeline
qa_model = loader.qa_pipeline
textgen_model = loader.text_gen_pipeline
translator = loader.translator_pipeline
stt_model = loader.stt_pipeline
zsl_model = loader.zsl_pipeline
gender_classifier = loader.gender_classifier
gender_model = loader.cnn_model # Custom CNN

# Clustering Dependencies
import pandas as pd
from sklearn.cluster import KMeans, DBSCAN
import matplotlib
matplotlib.use('Agg') # Non-interactive backend
import matplotlib.pyplot as plt
import io
import base64
from sklearn.preprocessing import StandardScaler

# Association Rules Dependencies
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# ---------------- ROUTES ---------------- #

@app.route('/')
def index():
    return render_template('index.html')

# -------- GENDER CLASSIFICATION -------- #
@app.route('/gender', methods=['GET', 'POST'])
def gender():
    result = ""
    if request.method == 'POST':
        if 'image' not in request.files:
            return render_template('gender.html', result="No image uploaded")
        
        file = request.files['image']
        if file.filename == '':
            return render_template('gender.html', result="No image selected")

        if file:
            # Save file temporarily (use secure filename)
            filename = secure_filename(file.filename)
            filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
            file.save(filepath)

            if gender_classifier:
                try:
                    img = Image.open(filepath)
                    results = gender_classifier(img)
                    # Extract the top result
                    result = results[0]['label'].capitalize()
                    print(f"Gender Classification Result: {result}")
                except Exception as e:
                    result = f"Error processing image with transformers: {e}"
            elif gender_model:
                try:
                    img = Image.open(filepath).convert('RGB')
                    img = img.resize((128, 128))
                    img_array = np.array(img).astype(np.float32) / 255.0
                    # Correct shape for PyTorch CNN: (batch, channels, height, width)
                    img_tensor = torch.from_numpy(img_array).permute(2, 0, 1).unsqueeze(0)
                    
                    with torch.no_grad():
                        prediction = gender_model(img_tensor)
                    result = "Male" if prediction.item() > 0.5 else "Female"
                except Exception as e:
                    import traceback
                    print(traceback.format_exc())
                    result = f"Error processing image: {e}"
            else:
                result = "Gender model is not loaded (check console for details)."
            
    return render_template('gender.html', result=result)

# -------- TEXT GENERATION -------- #
@app.route('/textgen', methods=['GET', 'POST'])
def textgen():
    result = ""
    if request.method == 'POST':
        text = request.form['prompt']
        if textgen_model:
            result = textgen_model(text, max_length=50)[0]['generated_text']
        else:
            result = "Text generation model not available"
    return render_template('textgen.html', generated_text=result)

# -------- TRANSLATION -------- #
@app.route('/translate', methods=['GET', 'POST'])
def translate():
    result = ""
    if request.method == 'POST':
        text = request.form.get('text', '')
        if translator:
            result = translator(text)[0]['translation_text']
        else:
            result = "Translation model not available"
    return render_template('translate.html', translated_text=result)

# -------- SENTIMENT (VOICE) -------- #
@app.route('/sentiment', methods=['GET', 'POST'])
def sentiment():
    result = ""
    if request.method == 'POST':
        typed_text = request.form.get('text', '').strip()
        audio_file = request.files.get('voice')
        
        text = ""
        if typed_text:
            text = typed_text
        elif audio_file:
            if audio_file.filename == '':
                return render_template('sentiment.html', result="No audio selected")
            audio_filename = secure_filename(audio_file.filename)
            audio_path = os.path.join(app.config['UPLOAD_FOLDER'], audio_filename)
            audio_file.save(audio_path)
            
            if stt_model is None:
                return render_template('sentiment.html', result="STT model not available")
            
            try:
                audio_array, sampling_rate = librosa.load(audio_path, sr=16000)
                # Ensure the audio array is 1D and float32
                audio_array = audio_array.astype(np.float32)
                stt_result = stt_model(audio_array)
                text = stt_result.get('text', '').strip()
                if not text:
                     return render_template('sentiment.html', result="Could not understand audio")
            except Exception as e:
                return render_template('sentiment.html', result=f"STT processing error: {str(e)}")
        else:
            return render_template('sentiment.html', result="No input provided")

        # Sentiment Analysis Logic
        if sentiment_model is None:
            result = f"Analyzed: {text} | Status: Sentiment model not available"
        else:
            try:
                sentiment_data = sentiment_model(text)[0]
                label = sentiment_data.get('label', 'Unknown').capitalize()
                score = round(sentiment_data.get('score', 0) * 100, 1)
                
                # Question Detection
                questions_words = ["who", "what", "where", "when", "why", "how", "is", "are", "do", "does", "can", "could", "would", "should"]
                is_question = text.strip().endswith("?") or any(text.lower().startswith(q + " ") for q in questions_words)
                
                type_str = "Question" if is_question else "Statement"
                result = f"Text: \"{text}\" | Type: {type_str} | Sentiment: {label} (Confidence: {score}%)"
            except Exception as e:
                result = f"Sentiment analysis failed: {str(e)}"
                
    return render_template('sentiment.html', result=result)

# -------- QUESTION ANSWERING (VOICE → VOICE) -------- #
@app.route('/qa', methods=['GET', 'POST'])
def qa():
    answer = ""
    context = ""
    question_text = ""
    if request.method == 'POST':
        context = request.form.get('context', '')
        audio_file = request.files.get('voice')
        typed_question = request.form.get('question', '').strip()

        if typed_question:
            question_text = typed_question
        elif audio_file:
            if audio_file.filename != '':
                audio_filename = secure_filename(audio_file.filename)
                audio_path = os.path.join(app.config['UPLOAD_FOLDER'], audio_filename)
                audio_file.save(audio_path)
                
                try:
                    audio_array, sampling_rate = librosa.load(audio_path, sr=16000)
                    audio_array = audio_array.astype(np.float32)
                    stt_result = stt_model(audio_array)
                    question_text = stt_result.get('text', '').strip()
                except Exception as e:
                    print(f"STT Error in QA: {e}")
                    answer = "Error processing your voice question."

        if not answer and question_text and context:
            if qa_model is None:
                answer = "Question-answering model not available"
            else:
                try:
                    result = qa_model(question=question_text, context=context)
                    answer = result.get('answer', str(result))
                    
                    try:
                        tts = gTTS(answer)
                        tts.save(os.path.join('static', 'answer.mp3'))
                    except Exception as e:
                        print(f"TTS failed: {e}")
                except Exception as e:
                    answer = f"QA model error: {e}"
        elif not answer:
            answer = "Please provide both context and a question (typed or voice)."

    return render_template('qa.html', answer=answer, context=context, question=question_text)

# -------- ZERO-SHOT LEARNING -------- #
@app.route('/zsl', methods=['GET', 'POST'])
def zsl():
    result = None
    if request.method == 'POST':
        text = request.form.get('text', '')
        labels = request.form.get('labels', '')
        
        if not text or not labels:
            return render_template('zsl.html', error="Both text and labels are required.")
            
        candidate_labels = [l.strip() for l in labels.split(',') if l.strip()]
        
        if zsl_model is None:
            return render_template('zsl.html', error="ZSL model not available.")
            
        try:
            output = zsl_model(text, candidate_labels=candidate_labels)
            # Find the index of the label with the highest score
            best_idx = np.argmax(output['scores'])
            result = {
                'label': output['labels'][0], # BART-MNLI returns sorted
                'score': round(output['scores'][0] * 100, 2),
                'all_results': zip(output['labels'], [round(s * 100, 2) for s in output['scores']])
            }
        except Exception as e:
            return render_template('zsl.html', error=f"ZSL error: {str(e)}")
            
    return render_template('zsl.html', result=result)


# -------- K-MEANS CLUSTERING -------- #
@app.route('/clustering', methods=['GET', 'POST'])
def clustering():
    plot_url = None
    cluster_info = None
    if request.method == 'POST':
        if 'file' not in request.files:
            return render_template('clustering.html', error="No file uploaded")
            
        file = request.files['file']
        n_clusters = int(request.form.get('clusters', 3))
        
        if file.filename == '':
            return render_template('clustering.html', error="No file selected")
            
        try:
            if file.filename.endswith('.csv'):
                df = pd.read_csv(file)
            else:
                df = pd.read_excel(file)
                
            # Keep only numeric columns
            numeric_df = df.select_dtypes(include=[np.number])
            
            if numeric_df.shape[1] < 2:
                return render_template('clustering.html', error="Dataset must have at least 2 numeric columns for clustering.")
                
            # Basic cleaning
            numeric_df = numeric_df.dropna()
            
            # K-Means
            kmeans = KMeans(n_clusters=n_clusters, random_state=42)
            df['Cluster'] = kmeans.fit_predict(numeric_df)
            
            # Create Plot (using first two numeric columns)
            plt.figure(figsize=(10, 6))
            scatter = plt.scatter(numeric_df.iloc[:, 0], numeric_df.iloc[:, 1], c=df['Cluster'], cmap='viridis', alpha=0.6)
            plt.colorbar(scatter, label='Cluster')
            plt.title(f'K-Means Clustering (K={n_clusters})')
            plt.xlabel(numeric_df.columns[0])
            plt.ylabel(numeric_df.columns[1])
            plt.grid(True, alpha=0.3)
            
            # Save plot to base64
            img = io.BytesIO()
            plt.savefig(img, format='png', bbox_inches='tight', transparent=True)
            img.seek(0)
            plot_url = base64.b64encode(img.getvalue()).decode()
            plt.close()
            
            # Cluster stats
            cluster_info = df.groupby('Cluster').size().to_dict()
            
        except Exception as e:
            return render_template('clustering.html', error=f"Clustering error: {str(e)}")
            
    return render_template('clustering.html', plot_url=plot_url, cluster_info=cluster_info)

# -------- DBSCAN CLUSTERING -------- #
@app.route('/dbscan', methods=['GET', 'POST'])
def dbscan():
    plot_url = None
    cluster_info = None
    if request.method == 'POST':
        if 'file' not in request.files:
            return render_template('dbscan.html', error="No file uploaded")
            
        file = request.files['file']
        eps = float(request.form.get('eps', 0.5))
        min_samples = int(request.form.get('min_samples', 5))
        
        if file.filename == '':
            return render_template('dbscan.html', error="No file selected")
            
        try:
            if file.filename.endswith('.csv'):
                df = pd.read_csv(file)
            else:
                df = pd.read_excel(file)
                
            # Keep only numeric columns
            numeric_df = df.select_dtypes(include=[np.number])
            
            if numeric_df.shape[1] < 2:
                return render_template('dbscan.html', error="Dataset must have at least 2 numeric columns for clustering.")
                
            # Basic cleaning
            numeric_df = numeric_df.dropna()
            
            # DBSCAN with Scaling
            scaler = StandardScaler()
            scaled_data = scaler.fit_transform(numeric_df)
            
            dbscan_model = DBSCAN(eps=eps, min_samples=min_samples)
            df['Cluster'] = dbscan_model.fit_predict(scaled_data)
            
            # Create Plot
            plt.figure(figsize=(10, 6))
            scatter = plt.scatter(numeric_df.iloc[:, 0], numeric_df.iloc[:, 1], c=df['Cluster'], cmap='viridis', alpha=0.6)
            plt.colorbar(scatter, label='Cluster')
            plt.title(f'DBSCAN Clustering (eps={eps}, min_samples={min_samples}) - Scaled')
            plt.xlabel(numeric_df.columns[0])
            plt.ylabel(numeric_df.columns[1])
            plt.grid(True, alpha=0.3)
            
            # Save plot to base64
            img = io.BytesIO()
            plt.savefig(img, format='png', bbox_inches='tight', transparent=True)
            img.seek(0)
            plot_url = base64.b64encode(img.getvalue()).decode()
            plt.close()
            
            # Cluster stats
            cluster_info = df.groupby('Cluster').size().to_dict()
            
        except Exception as e:
            return render_template('dbscan.html', error=f"DBSCAN error: {str(e)}")
            
    return render_template('dbscan.html', plot_url=plot_url, cluster_info=cluster_info)

# -------- A-PRIORI ASSOCIATION RULES -------- #
@app.route('/apriori', methods=['GET', 'POST'])
def apriori_route():
    rules_html = None
    if request.method == 'POST':
        if 'file' not in request.files:
            return render_template('apriori.html', error="No file uploaded")
            
        file = request.files['file']
        min_support = float(request.form.get('min_support', 0.1))
        min_threshold = float(request.form.get('min_threshold', 0.7))
        metric = request.form.get('metric', 'lift')
        has_header = request.form.get('has_header') == 'on'
        
        if file.filename == '':
            return render_template('apriori.html', error="No file selected")
            
        try:
            if file.filename.endswith('.csv'):
                df = pd.read_csv(file, header=0 if has_header else None)
            else:
                df = pd.read_excel(file, header=0 if has_header else None)
            
            # Convert to list of lists (transactions) - Handle nulls and whitespace
            transactions = []
            values = df.values.tolist()
            for row in values:
                # Filter out nan, None, and empty strings, and convert everything to string
                transaction = sorted(list(set([str(item).strip() for item in row if pd.notna(item) and str(item).strip() != ''])))
                if transaction:
                    transactions.append(transaction)
            
            if not transactions:
                 return render_template('apriori.html', error="No valid transactions found in file.")

            # Transaction Encoding
            te = TransactionEncoder()
            te_ary = te.fit(transactions).transform(transactions)
            encoded_df = pd.DataFrame(te_ary, columns=te.columns_)
            
            # Generate Frequent Itemsets
            frequent_itemsets = apriori(encoded_df, min_support=min_support, use_colnames=True)
            
            if frequent_itemsets.empty:
                return render_template('apriori.html', error="No frequent itemsets found. Try lowering min support.")
                
            # Generate Rules
            rules = association_rules(frequent_itemsets, metric=metric, min_threshold=min_threshold)
            
            if rules.empty:
                return render_template('apriori.html', error=f"No rules found for {metric} >= {min_threshold}. Try lowering threshold.")
            
            # Format rules for display
            rules['antecedents'] = rules['antecedents'].apply(lambda x: list(x))
            rules['consequents'] = rules['consequents'].apply(lambda x: list(x))
            
            # Selection of columns for display
            display_rules = rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
            rules_html = display_rules.to_dict(orient='records')
            
        except Exception as e:
            import traceback
            print(traceback.format_exc())
            return render_template('apriori.html', error=f"A-priori error: {str(e)}")
            
    return render_template('apriori.html', rules=rules_html)

if __name__ == '__main__':
    app.run(debug=True, use_reloader=False)