Spaces:

Berzelius255
/

Areo-AI

Build error

App Files Files Community

Robert Yaw Agyekum Addo commited on Aug 20, 2025

Commit

0ee82c6

1 Parent(s): a219526

Modified app.py with ablation experiments

Browse files

Files changed (1) hide show

app.py +853 -146

app.py CHANGED Viewed

@@ -27,6 +27,11 @@ from sentence_transformers import SentenceTransformer
 #    context_precision
 #)
 from datasets import Dataset
 from typing import List, Dict
 import asyncio
@@ -76,6 +81,468 @@ class SentenceTransformerEmbeddings(BaseRagasEmbeddings):
         loop = asyncio.get_event_loop()
         return await loop.run_in_executor(None, self.embed_documents, texts)
 # Database setup
 conn = sqlite3.connect('users.db')
 c = conn.cursor()
@@ -1270,7 +1737,7 @@ if 'conversation_history' not in st.session_state:
     st.session_state.conversation_history = {}
 # Load YOLOv8 model
-yolo_model = YOLO("./model/plantdoc_model_yolov8.pt")
 def preprocess_image(image, target_size=(224, 224)):
     """
@@ -1319,6 +1786,53 @@ def text_to_speech(text, voice="af_heart", language="en"):
     except Exception as e:
         st.error(f"Error generating speech: {str(e)}")
         return None
 async def generate_groq_response(prompt, model_name="mixtral-8x7b-32768", conversation_history=None):
     try:
@@ -1431,13 +1945,27 @@ def inference(image):
     return infer, names_infer, classes, confidence_scores, bounding_boxes
 # Streamlit application
-st.title("Interactive Crop Disease Detection and Analysis🌾🌿🥬☘️")
-st.write(f"Welcome, {st.session_state['username']}!😊")
-# Logout button
-if st.button("Logout"):
-    logout()
-    st.rerun()
 # Add sidebar for configuration
 with st.sidebar:
@@ -1496,165 +2024,344 @@ language = st.selectbox(
     help="Select your preferred language"
 )
-# Main content
-uploaded_files = st.file_uploader("Upload images for disease detection", type=["jpg", "jpeg", "png"], accept_multiple_files=True)
-if uploaded_files:
-    for uploaded_file in uploaded_files:
-        file_id = uploaded_file.name
-        # Initialize conversation history for this image if it doesn't exist
-        if file_id not in st.session_state.conversation_history:
-            st.session_state.conversation_history[file_id] = []
-        st.header(f"Analysis for {file_id}")
-        # Create columns for side-by-side display
-        col1, col2 = st.columns(2)
-        # Process image
-        file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
-        image = cv2.imdecode(file_bytes, 1)
-        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-        # Display original image
-        with col1:
-            st.subheader("Original Image")
-            st.image(image)
-        # Process and display results
-        with st.spinner("Processing image..."):
-            infer_image, classes_in_image, classes_in_dataset, confidences, boxes = inference(image)
-        with col2:
-            st.subheader("Detected Diseases")
-            st.image(infer_image)
-        # Display detection details
-        if show_confidence:
-            st.subheader("Detection Details")
-            for cls, conf in zip(classes_in_image, confidences):
-                st.write(f"- {classes_in_dataset[cls]}: {conf:.2%} confidence")
-       # Display conversation history
-        if st.session_state.conversation_history[file_id]:
-            st.subheader("Conversation History")
-            for i, entry in enumerate(st.session_state.conversation_history[file_id]):
-                question, response = entry[:2]
-                with st.expander(f"Q{i+1}: {question[:50]}...", expanded=False):
-                    st.write("**Question:**", question)
-                    st.write("**Response:**", response)
-                    # Display feedback buttons and handle comment collection
-                    display_feedback_buttons(file_id, i, question, response)
-                    # Audio playback option
-                    if st.button("🔊 Listen", key=f"listen_history_{file_id}_{i}"):
-                        with st.spinner("Generating audio..."):
-                            audio_bytes = text_to_speech(response, voice=selected_voice)
-                            if audio_bytes:
-                                st.audio(audio_bytes, format="audio/wav")
-        # User input for questions
-        st.subheader("Ask Questions")
-        user_text = st.text_area(
-            "Enter your question about the detected diseases:",
-            placeholder="Example: What are the best treatment options for these diseases? What preventive measures should I take?",
-            key=f"question_{file_id}"
         )
-        if st.button("Get Analysis", key=f"analyze_{file_id}"):
-            with st.spinner(f"Generating analysis using {selected_model}..."):
-                # Translate user input
-                translated_input = asyncio.run(translator.translate(user_text, dest='en')).text
-                st.write(f"Translated Input (to English): {translated_input}")
-                # Extract detected disease names
-                detected_classes = [classes_in_dataset[cls] for cls in classes_in_image]
-                # Fetch reference answers for detected diseases
-                reference_answers = []
-                for disease_name in detected_classes:
-                    reference_answer = get_reference_answer(disease_name)
-                    if reference_answer:
-                        reference_answers.append(reference_answer)
-                # Combine reference answers into a single string
-                reference_answer = "\n".join(reference_answers) if reference_answers else None
-                # Generate response with RAG
-                response, relevant_chunks, ragas_result = asyncio.run(generate_rag_response(
                     translated_input,
-                    st.session_state.conversation_history[file_id],
-                    reference_answer  # Pass the reference answer for evaluation
                 ))
-                print("Response:", response)
-                if response is None:
-                    st.error("Failed to generate a response. Please try again.")
-                    response = "No response generated."
-                # Move the translate function call here
                 if response:
-                    try:
-                        translated_response = asyncio.run(translator.translate(response, dest=language)).text
-                    except Exception as e:
-                        st.error(f"Translation failed: {e}")
-                        translated_response = response  # Fallback to the original response
-                else:
-                    translated_response = response
-                st.session_state.conversation_history[file_id].append((user_text, translated_response, None))
-                # Display the response and evaluation metrics
-                #st.markdown("### Relevant Information")
-                #for chunk in relevant_chunks:
-                #    st.write(f"- **Chunk {chunk['chunk_number']}**: {chunk['text']}")
-                st.markdown(response)
-                # Add audio playback option for the latest response
-                col1, col2 = st.columns([1, 4])
-                with col1:
-                    if st.button("🔊 Listen", key=f"listen_latest_{file_id}"):
-                        with st.spinner("Generating audio..."):
-                            audio_bytes = text_to_speech(response, language)
-                            if audio_bytes:
-                                st.audio(audio_bytes, format='audio/mp3')
-        # Export conversation
-        if st.button("Export Conversation", key=f"export_{file_id}"):
-            conversation_text = f"""
-            # Crop Disease Analysis Report
-            ## Image Information
-            - Filename: {file_id}
-            - Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
-            ## Detected Diseases
-            {', '.join([classes_in_dataset[cls] for cls in classes_in_image])}
-            ## Conversation History
-            """
-            for i, entry in enumerate(st.session_state.conversation_history[file_id]):
-                if len(entry) == 2:  # Handle legacy entries
-                    question, response = entry
-                    feedback = "No feedback"
                 else:
-                    question, response, feedback = entry
-                conversation_text += f"\n### Question {i+1}:\n{question}\n\n### Answer {i+1}:\n{response}\n\n### Feedback {i+1}:\n{feedback}\n"
             st.download_button(
-                label="Download Conversation",
-                data=conversation_text,
-                file_name=f"disease_analysis_{file_id}.md",
-                mime="text/markdown"
             )
 # Add a footer with clear instructions
 st.markdown("""
 ---

 #    context_precision
 #)
 from datasets import Dataset
+import pandas as pd
+import random
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
 from typing import List, Dict
 import asyncio
         loop = asyncio.get_event_loop()
         return await loop.run_in_executor(None, self.embed_documents, texts)
+class RAGSystemVariants:
+    def __init__(self):
+        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+    async def baseline_rag(self, query, top_k=3):
+        """Your current full RAG system"""
+        chunks = retrieve_relevant_documents(query, [], top_k)
+        context = "\n".join([chunk["text"] for chunk in chunks])
+        response = await generate_groq_response(f"Context: {context}\n\nQuestion: {query}")
+        return response, context
+    async def no_retrieval(self, query):
+        """Generation only - no retrieval"""
+        response = await generate_groq_response(query)
+        return response, ""
+    async def random_retrieval(self, query, top_k=3):
+        """Random document selection instead of semantic retrieval"""
+        try:
+            all_docs = client.scroll(collection_name=collection_name, limit=100)[0]
+            if len(all_docs) > 0:
+                random_chunks = random.sample(all_docs, min(top_k, len(all_docs)))
+                context = "\n".join([chunk.payload["text"] for chunk in random_chunks])
+            else:
+                context = ""
+            response = await generate_groq_response(f"Context: {context}\n\nQuestion: {query}")
+            return response, context
+        except Exception as e:
+            st.error(f"Error in random retrieval: {e}")
+            return "Error in random retrieval", ""
+    async def different_top_k(self, query, top_k):
+        """Test different top-k values"""
+        chunks = retrieve_relevant_documents(query, [], top_k)
+        context = "\n".join([chunk["text"] for chunk in chunks])
+        response = await generate_groq_response(f"Context: {context}\n\nQuestion: {query}")
+        return response, context
+def create_test_dataset(limit=20):
+    """Create a test dataset for RAGAS evaluation"""
+    test_cases = []
+    try:
+        conn = sqlite3.connect('./db/disease_knowledge_base.db')
+        c = conn.cursor()
+        c.execute("SELECT name, cause, symptoms, treatment FROM diseases LIMIT ?", (limit,))
+        diseases = c.fetchall()
+        conn.close()
+        for disease_name, cause, symptoms, treatment in diseases:
+            questions_and_answers = [
+                (f"What causes {disease_name}?", cause),
+                (f"What are the symptoms of {disease_name}?", symptoms),
+                (f"How do I treat {disease_name}?", treatment),
+                (f"Tell me about {disease_name}", f"Cause: {cause}\nSymptoms: {symptoms}\nTreatment: {treatment}"),
+            ]
+            for question, ground_truth in questions_and_answers:
+                test_cases.append({
+                    "question": question,
+                    "ground_truth": ground_truth,
+                    "disease": disease_name
+                })
+        return test_cases[:limit]
+    except Exception as e:
+        st.error(f"Error creating test dataset: {e}")
+        return []
+async def run_ablation_study(progress_bar, status_text, max_questions=20):
+    """Run comprehensive ablation study with progress tracking"""
+    status_text.text("Creating test dataset...")
+    test_cases = create_test_dataset(limit=max_questions)
+    if not test_cases:
+        st.error("No test cases created. Check your database connection.")
+        return None
+    rag_variants = RAGSystemVariants()
+    experiments = {
+        "Full_RAG_k3": lambda q: rag_variants.baseline_rag(q, top_k=3),
+        "No_Retrieval": lambda q: rag_variants.no_retrieval(q),
+        "Random_Retrieval": lambda q: rag_variants.random_retrieval(q, top_k=3),
+        "RAG_k1": lambda q: rag_variants.different_top_k(q, top_k=1),
+        "RAG_k5": lambda q: rag_variants.different_top_k(q, top_k=5),
+        "RAG_k10": lambda q: rag_variants.different_top_k(q, top_k=10),
+    }
+    all_results = []
+    total_experiments = len(experiments) * len(test_cases)
+    current_progress = 0
+    for exp_name, exp_func in experiments.items():
+        status_text.text(f"Running experiment: {exp_name}")
+        questions = []
+        answers = []
+        contexts = []
+        ground_truths = []
+        for test_case in test_cases:
+            try:
+                answer, context = await exp_func(test_case["question"])
+                questions.append(test_case["question"])
+                answers.append(answer)
+                contexts.append([context] if context else [""])
+                ground_truths.append(test_case["ground_truth"])
+                current_progress += 1
+                progress_bar.progress(current_progress / total_experiments)
+            except Exception as e:
+                st.error(f"Error in {exp_name}: {e}")
+                continue
+        exp_results = []
+        evaluator = LocalMetricsEvaluator()
+        for q, a, c, gt in zip(questions, answers, contexts, ground_truths):
+            context_str = c[0] if c and c[0] else ""
+            metrics = {
+                "question": q,
+                "answer": a,
+                "context": context_str,
+                "ground_truth": gt,
+                "experiment": exp_name,
+                "answer_relevancy": evaluator.evaluate_answer_relevancy(q, a),
+                "faithfulness": evaluator.evaluate_faithfulness(a, context_str) if context_str else 1.0,
+                "answer_correctness": evaluator.evaluate_answer_correctness(a, gt),
+                "context_precision": evaluator.evaluate_context_precision(q, context_str) if context_str else 0.0,
+                "context_recall": evaluator.evaluate_context_recall(q, context_str, gt) if context_str else 0.0
+            }
+            exp_results.append(metrics)
+        all_results.extend(exp_results)
+    return pd.DataFrame(all_results)
+def visualize_ablation_results(results_df):
+    """Create interactive visualizations for ablation study results"""
+    summary_stats = results_df.groupby('experiment').agg({
+        'answer_relevancy': ['mean', 'std'],
+        'faithfulness': ['mean', 'std'],
+        'answer_correctness': ['mean', 'std'],
+        'context_precision': ['mean', 'std'],
+        'context_recall': ['mean', 'std']
+    }).round(3)
+    summary_stats.columns = ['_'.join(col).strip() for col in summary_stats.columns.values]
+    summary_stats = summary_stats.reset_index()
+    metrics = ['answer_relevancy_mean', 'faithfulness_mean', 'answer_correctness_mean',
+               'context_precision_mean', 'context_recall_mean']
+    # Radar chart
+    fig_radar = go.Figure()
+    for _, row in summary_stats.iterrows():
+        fig_radar.add_trace(go.Scatterpolar(
+            r=[row[metric] for metric in metrics],
+            theta=[metric.replace('_mean', '').replace('_', ' ').title() for metric in metrics],
+            fill='toself',
+            name=row['experiment']
+        ))
+    fig_radar.update_layout(
+        polar=dict(
+            radialaxis=dict(
+                visible=True,
+                range=[0, 1]
+            )),
+        showlegend=True,
+        title="RAGAS Metrics Comparison Across Experiments"
+    )
+    # Bar chart comparison
+    fig_bar = make_subplots(
+        rows=2, cols=3,
+        subplot_titles=[metric.replace('_mean', '').replace('_', ' ').title() for metric in metrics],
+    )
+    for i, metric in enumerate(metrics):
+        row = (i // 3) + 1
+        col = (i % 3) + 1
+        fig_bar.add_trace(
+            go.Bar(
+                x=summary_stats['experiment'],
+                y=summary_stats[metric],
+                error_y=dict(type='data', array=summary_stats[metric.replace('mean', 'std')]),
+                name=metric.replace('_mean', '').replace('_', ' ').title(),
+                showlegend=False
+            ),
+            row=row, col=col
+        )
+    fig_bar.update_layout(height=800, title="Detailed Metrics Comparison")
+    return fig_radar, fig_bar, summary_stats
+def render_research_page():
+    """Render the research/ablation study page"""
+    st.title("🔬 RAG System Research Dashboard")
+    st.markdown("Systematic evaluation and ablation study of the crop disease detection RAG system")
+    # Initialize session state for results
+    if 'ablation_results' not in st.session_state:
+        st.session_state['ablation_results'] = None
+    tabs = st.tabs(["Ablation Study", "Model Comparison", "Error Analysis", "Export Results"])
+    with tabs[0]:
+        st.header("🧪 Ablation Study")
+        st.write("This systematically evaluates different components of the RAG system.")
+        col1, col2 = st.columns(2)
+        with col1:
+            max_questions = st.number_input("Number of test questions per experiment",
+                                          min_value=5, max_value=50, value=20)
+        with col2:
+            selected_model_research = st.selectbox(
+                "Select Model for Experiments",
+                list(SUPPORTED_MODELS.keys()),
+                key="research_model_select"
+            )
+        if st.button("🚀 Start Ablation Study", type="primary"):
+            progress_bar = st.progress(0)
+            status_text = st.empty()
+            with st.spinner("Running ablation study..."):
+                try:
+                    results_df = asyncio.run(run_ablation_study(progress_bar, status_text, max_questions))
+                    if results_df is not None:
+                        st.session_state['ablation_results'] = results_df
+                        st.success("✅ Ablation study completed!")
+                        # Show summary statistics
+                        st.subheader("📊 Summary Statistics")
+                        summary_stats = results_df.groupby('experiment').agg({
+                            'answer_relevancy': 'mean',
+                            'faithfulness': 'mean',
+                            'answer_correctness': 'mean',
+                            'context_precision': 'mean',
+                            'context_recall': 'mean'
+                        }).round(3)
+                        st.dataframe(summary_stats, use_container_width=True)
+                        # Key insights
+                        best_overall = summary_stats.mean(axis=1).idxmax()
+                        st.success(f"🏆 **Best Overall Configuration:** {best_overall}")
+                        col1, col2, col3 = st.columns(3)
+                        with col1:
+                            best_relevancy = summary_stats['answer_relevancy'].idxmax()
+                            st.metric("Best Answer Relevancy", best_relevancy,
+                                     f"{summary_stats.loc[best_relevancy, 'answer_relevancy']:.3f}")
+                        with col2:
+                            best_faithfulness = summary_stats['faithfulness'].idxmax()
+                            st.metric("Best Faithfulness", best_faithfulness,
+                                     f"{summary_stats.loc[best_faithfulness, 'faithfulness']:.3f}")
+                        with col3:
+                            best_correctness = summary_stats['answer_correctness'].idxmax()
+                            st.metric("Best Correctness", best_correctness,
+                                     f"{summary_stats.loc[best_correctness, 'answer_correctness']:.3f}")
+                        # Create and display visualizations
+                        fig_radar, fig_bar, summary_stats_detailed = visualize_ablation_results(results_df)
+                        st.subheader("📈 Results Visualization")
+                        viz_tab1, viz_tab2, viz_tab3 = st.tabs(["Radar Chart", "Detailed Comparison", "Raw Data"])
+                        with viz_tab1:
+                            st.plotly_chart(fig_radar, use_container_width=True)
+                            st.markdown("**Interpretation:** The radar chart shows the relative performance of each experiment across all RAGAS metrics. Larger areas indicate better overall performance.")
+                        with viz_tab2:
+                            st.plotly_chart(fig_bar, use_container_width=True)
+                            st.markdown("**Interpretation:** The bar charts show detailed performance with error bars indicating standard deviation across test cases.")
+                        with viz_tab3:
+                            st.dataframe(results_df, use_container_width=True)
+                            # Download options
+                            csv = results_df.to_csv(index=False)
+                            st.download_button(
+                                label="📥 Download Raw Results (CSV)",
+                                data=csv,
+                                file_name=f"ablation_study_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
+                                mime="text/csv"
+                            )
+                except Exception as e:
+                    st.error(f"❌ Error running ablation study: {str(e)}")
+                    st.exception(e)
+    with tabs[1]:
+        st.header("Model Comparison")
+        st.write("Compare different LLM models on the same test dataset.")
+        selected_models = st.multiselect(
+            "Select models to compare",
+            list(SUPPORTED_MODELS.keys()),
+            default=list(SUPPORTED_MODELS.keys())[:2]
+        )
+        num_questions_comp = st.number_input("Number of questions for comparison",
+                                           min_value=5, max_value=30, value=10)
+        if selected_models and st.button("🔍 Run Model Comparison"):
+            st.info("Model comparison functionality can be extended here...")
+            progress_bar_comp = st.progress(0)
+            status_text_comp = st.empty()
+            with st.spinner("Comparing models..."):
+                # Create a simplified comparison focusing on generation quality
+                test_cases = create_test_dataset(limit=num_questions_comp)
+                comparison_results = []
+                total_comparisons = len(selected_models) * len(test_cases)
+                current_progress_comp = 0
+                for model_name in selected_models:
+                    status_text_comp.text(f"Testing model: {model_name}")
+                    for test_case in test_cases:
+                        try:
+                            # Generate response with current model
+                            response = asyncio.run(generate_groq_response(
+                                test_case["question"],
+                                model_name=SUPPORTED_MODELS[model_name]["name"]
+                            ))
+                            # Evaluate
+                            evaluator = LocalMetricsEvaluator()
+                            comparison_results.append({
+                                "model": model_name,
+                                "question": test_case["question"],
+                                "answer": response,
+                                "ground_truth": test_case["ground_truth"],
+                                "disease": test_case["disease"],
+                                "answer_relevancy": evaluator.evaluate_answer_relevancy(test_case["question"], response),
+                                "answer_correctness": evaluator.evaluate_answer_correctness(response, test_case["ground_truth"])
+                            })
+                            current_progress_comp += 1
+                            progress_bar_comp.progress(current_progress_comp / total_comparisons)
+                        except Exception as e:
+                            st.error(f"Error testing {model_name}: {e}")
+                            continue
+                if comparison_results:
+                    comp_df = pd.DataFrame(comparison_results)
+                    # Summary by model
+                    model_summary = comp_df.groupby('model').agg({
+                        'answer_relevancy': 'mean',
+                        'answer_correctness': 'mean'
+                    }).round(3)
+                    st.subheader("📊 Model Performance Summary")
+                    st.dataframe(model_summary, use_container_width=True)
+                    # Visualization
+                    fig_model_comp = px.bar(
+                        model_summary.reset_index(),
+                        x='model',
+                        y=['answer_relevancy', 'answer_correctness'],
+                        title="Model Performance Comparison",
+                        barmode='group'
+                    )
+                    st.plotly_chart(fig_model_comp, use_container_width=True)
+                    # Store results
+                    st.session_state['model_comparison_results'] = comp_df
+    with tabs[2]:
+        st.header("Error Analysis")
+        st.write("Analyze failure cases and performance patterns.")
+        if st.session_state['ablation_results'] is not None:
+            results_df = st.session_state['ablation_results']
+            # Find worst performing cases
+            st.subheader("Worst Performing Cases")
+            worst_cases = results_df.nsmallest(10, 'answer_correctness')[['question', 'answer', 'ground_truth', 'experiment', 'answer_correctness']]
+            st.dataframe(worst_cases, use_container_width=True)
+            # Performance by experiment
+            st.subheader("Performance Distribution")
+            fig_box = px.box(results_df, x='experiment', y='answer_correctness',
+                           title="Answer Correctness Distribution by Experiment")
+            st.plotly_chart(fig_box, use_container_width=True)
+        else:
+            st.info("Run an ablation study first to see error analysis.")
+    with tabs[3]:
+        st.header("Export Results")
+        st.write("Export results for research papers and further analysis.")
+        if st.session_state['ablation_results'] is not None:
+            results_df = st.session_state['ablation_results']
+            # Generate summary report
+            report = f"""
+# RAG System Ablation Study Report
+**Date:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+**Total Experiments:** {len(results_df['experiment'].unique())}
+**Total Test Cases:** {len(results_df)}
+## Summary Statistics
+{results_df.groupby('experiment').agg({
+    'answer_relevancy': ['mean', 'std'],
+    'faithfulness': ['mean', 'std'],
+    'answer_correctness': ['mean', 'std'],
+    'context_precision': ['mean', 'std'],
+    'context_recall': ['mean', 'std']
+}).round(3).to_string()}
+## Best Performing Configurations
+- **Best Answer Relevancy:** {results_df.groupby('experiment')['answer_relevancy'].mean().idxmax()}
+- **Best Faithfulness:** {results_df.groupby('experiment')['faithfulness'].mean().idxmax()}
+- **Best Answer Correctness:** {results_df.groupby('experiment')['answer_correctness'].mean().idxmax()}
+## Recommendations
+Based on the ablation study results, we recommend...
+[Add your analysis here]
+            """
+            col1, col2 = st.columns(2)
+            with col1:
+                st.download_button(
+                    label="📄 Download Report (Markdown)",
+                    data=report,
+                    file_name=f"ablation_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
+                    mime="text/markdown"
+                )
+            with col2:
+                csv_data = results_df.to_csv(index=False)
+                st.download_button(
+                    label="📊 Download Data (CSV)",
+                    data=csv_data,
+                    file_name=f"ablation_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
+                    mime="text/csv"
+                )
+        else:
+            st.info("No results available for export. Run an ablation study first.")
 # Database setup
 conn = sqlite3.connect('users.db')
 c = conn.cursor()
     st.session_state.conversation_history = {}
 # Load YOLOv8 model
+yolo_model = YOLO("/workspaces/codespaces-blank/Areo-AI/model/plantdoc_model_yolov8.pt")
 def preprocess_image(image, target_size=(224, 224)):
     """
     except Exception as e:
         st.error(f"Error generating speech: {str(e)}")
         return None
+async def generate_rag_response_general(query, conversation_history=None):
+    """
+    Generate a response using RAG for general questions (no specific detected diseases)
+    """
+    # Retrieve relevant chunks based on the query
+    relevant_chunks = retrieve_relevant_documents(query, [], top_k=5)  # Empty disease list for general queries
+    # Build context from retrieved chunks
+    context = "\n".join([chunk["text"] for chunk in relevant_chunks])
+    # Create a more general prompt for consultation
+    consultation_prompt = f"""As an expert plant pathologist and agricultural consultant, please provide a comprehensive answer to the following question about crop diseases and plant health.
+Context from knowledge base:
+{context}
+Question: {query}
+Please provide a detailed, practical response that includes:
+1. Direct answer to the question
+2. Relevant scientific background
+3. Practical recommendations
+4. Prevention strategies (if applicable)
+5. When to seek professional help (if applicable)
+Make your response accessible to farmers and agricultural practitioners while maintaining scientific accuracy."""
+    # Generate response
+    selected_model_name = SUPPORTED_MODELS[st.session_state.get('selected_model', 'llama-3.1-8b-instant')]["name"]
+    response = await generate_groq_response(
+        consultation_prompt,
+        model_name=selected_model_name,
+        conversation_history=conversation_history
+    )
+    # Evaluate using local metrics (simplified for general consultation)
+    evaluator = LocalMetricsEvaluator()
+    ragas_result = {
+        "answer_relevancy": evaluator.evaluate_answer_relevancy(query, response),
+        "faithfulness": evaluator.evaluate_faithfulness(response, context),
+        "answer_correctness": 0.8,  # Placeholder since we don't have ground truth for general questions
+        "context_precision": evaluator.evaluate_context_precision(query, context),
+        "context_recall": 0.8  # Placeholder
+    }
+    return response, relevant_chunks, ragas_result
 async def generate_groq_response(prompt, model_name="mixtral-8x7b-32768", conversation_history=None):
     try:
     return infer, names_infer, classes, confidence_scores, bounding_boxes
 # Streamlit application
+st.sidebar.markdown("---")
+st.sidebar.header("🔬 Research Tools")
+# Page selection
+page_selection = st.sidebar.radio(
+    "Navigate to:",
+    ["🏠 Main App", "🔬 Research Dashboard"],
+    index=0
+)
+if page_selection == "🔬 Research Dashboard":
+    render_research_page()
+else:
+    # Your existing main app code
+    st.title("Interactive Crop Disease Detection and Analysis🌾🌿🥬☘️")
+    st.write(f"Welcome, {st.session_state['username']}!😊")
+    # Logout button
+    if st.button("Logout"):
+        logout()
+        st.rerun()
 # Add sidebar for configuration
 with st.sidebar:
     help="Select your preferred language"
 )
+tab1, tab2 = st.tabs(["🖼️ Image Analysis", "💬 General Consultation"])
+with tab1:
+    st.header("Image-Based Disease Detection")
+    st.write("Upload images of your crops to detect diseases and get specific analysis.")
+    # Main content - Image upload and analysis
+    uploaded_files = st.file_uploader("Upload images for disease detection", type=["jpg", "jpeg", "png"], accept_multiple_files=True)
+    if uploaded_files:
+        for uploaded_file in uploaded_files:
+            file_id = uploaded_file.name
+            # Initialize conversation history for this image if it doesn't exist
+            if file_id not in st.session_state.conversation_history:
+                st.session_state.conversation_history[file_id] = []
+            st.subheader(f"Analysis for {file_id}")
+            # Create columns for side-by-side display
+            col1, col2 = st.columns(2)
+            # Process image
+            file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
+            image = cv2.imdecode(file_bytes, 1)
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            # Display original image
+            with col1:
+                st.subheader("Original Image")
+                st.image(image)
+            # Process and display results
+            with st.spinner("Processing image..."):
+                infer_image, classes_in_image, classes_in_dataset, confidences, boxes = inference(image)
+            with col2:
+                st.subheader("Detected Diseases")
+                st.image(infer_image)
+            # Display detection details
+            if show_confidence:
+                st.subheader("Detection Details")
+                for cls, conf in zip(classes_in_image, confidences):
+                    st.write(f"- {classes_in_dataset[cls]}: {conf:.2%} confidence")
+            # Display conversation history for this image
+            if st.session_state.conversation_history[file_id]:
+                st.subheader("Conversation History")
+                for i, entry in enumerate(st.session_state.conversation_history[file_id]):
+                    question, response = entry[:2]
+                    with st.expander(f"Q{i+1}: {question[:50]}...", expanded=False):
+                        st.write("**Question:**", question)
+                        st.write("**Response:**", response)
+                        # Display feedback buttons and handle comment collection
+                        display_feedback_buttons(file_id, i, question, response)
+                        # Audio playback option
+                        if st.button("🔊 Listen", key=f"listen_history_{file_id}_{i}"):
+                            with st.spinner("Generating audio..."):
+                                audio_bytes = text_to_speech(response, voice=selected_voice)
+                                if audio_bytes:
+                                    st.audio(audio_bytes, format="audio/wav")
+            # User input for questions about the detected diseases
+            st.subheader("Ask Questions About Detected Diseases")
+            user_text = st.text_area(
+                "Enter your question about the detected diseases:",
+                placeholder="Example: What are the best treatment options for these diseases? What preventive measures should I take?",
+                key=f"question_{file_id}"
+            )
+            if st.button("Get Analysis", key=f"analyze_{file_id}"):
+                with st.spinner(f"Generating analysis using {selected_model}..."):
+                    # Translate user input
+                    translated_input = asyncio.run(translator.translate(user_text, dest='en')).text
+                    st.write(f"Translated Input (to English): {translated_input}")
+                    # Extract detected disease names
+                    detected_classes = [classes_in_dataset[cls] for cls in classes_in_image]
+                    # Fetch reference answers for detected diseases
+                    reference_answers = []
+                    for disease_name in detected_classes:
+                        reference_answer = get_reference_answer(disease_name)
+                        if reference_answer:
+                            reference_answers.append(reference_answer)
+                    # Combine reference answers into a single string
+                    reference_answer = "\n".join(reference_answers) if reference_answers else None
+                    # Generate response with RAG
+                    response, relevant_chunks, ragas_result = asyncio.run(generate_rag_response(
+                        translated_input,
+                        st.session_state.conversation_history[file_id],
+                        reference_answer  # Pass the reference answer for evaluation
+                    ))
+                    print("Response:", response)
+                    if response is None:
+                        st.error("Failed to generate a response. Please try again.")
+                        response = "No response generated."
+                    # Move the translate function call here
+                    if response:
+                        try:
+                            translated_response = asyncio.run(translator.translate(response, dest=language)).text
+                        except Exception as e:
+                            st.error(f"Translation failed: {e}")
+                            translated_response = response  # Fallback to the original response
+                    else:
+                        translated_response = response
+                    st.session_state.conversation_history[file_id].append((user_text, translated_response, None))
+                    # Display the response and evaluation metrics
+                    #st.markdown("### Relevant Information")
+                    #for chunk in relevant_chunks:
+                    #    st.write(f"- **Chunk {chunk['chunk_number']}**: {chunk['text']}")
+                    st.markdown(response)
+                    # Add audio playback option for the latest response
+                    col1, col2 = st.columns([1, 4])
+                    with col1:
+                        if st.button("🔊 Listen", key=f"listen_latest_{file_id}"):
+                            with st.spinner("Generating audio..."):
+                                audio_bytes = text_to_speech(response, language)
+                                if audio_bytes:
+                                    st.audio(audio_bytes, format='audio/mp3')
+with tab2:
+    st.header("General Disease Consultation")
+    st.write("Ask questions about crop diseases without uploading images. Get expert advice on plant pathology topics.")
+    # Initialize general consultation history
+    if 'general_consultation' not in st.session_state.conversation_history:
+        st.session_state.conversation_history['general_consultation'] = []
+    # Disease selection helper
+    st.subheader("🎯 Quick Disease Lookup")
+    col1, col2 = st.columns([2, 1])
+    with col1:
+        # Get list of diseases from database for quick selection
+        try:
+            conn = sqlite3.connect('./db/disease_knowledge_base.db')
+            c = conn.cursor()
+            c.execute("SELECT DISTINCT name FROM diseases ORDER BY name")
+            available_diseases = [row[0] for row in c.fetchall()]
+            conn.close()
+        except:
+            available_diseases = ["Corn Leaf Blight", "Apple Scab", "Tomato Late Blight", "Wheat Rust"]
+        selected_disease = st.selectbox(
+            "Select a specific disease for quick information:",
+            [""] + available_diseases,
+            help="Choose a disease to get instant information about it"
         )
+    with col2:
+        if selected_disease and st.button("Get Disease Info", key="quick_disease_info"):
+            with st.spinner("Retrieving disease information..."):
+                quick_query = f"Tell me about {selected_disease} - its causes, symptoms, and treatment options."
+                # Generate response using RAG
+                response, relevant_chunks, ragas_result = asyncio.run(generate_rag_response_general(
+                    quick_query,
+                    st.session_state.conversation_history['general_consultation']
+                ))
+                # Translate if needed
+                if language != 'en':
+                    try:
+                        translated_response = translator.translate(response, dest=language).text
+                    except:
+                        translated_response = response
+                else:
+                    translated_response = response
+                # Add to conversation history
+                st.session_state.conversation_history['general_consultation'].append((quick_query, translated_response))
+                st.markdown("### Disease Information")
+                st.markdown(translated_response)
+                # Audio option
+                if st.button("🔊 Listen to Response", key="listen_quick_disease"):
+                    with st.spinner("Generating audio..."):
+                        audio_bytes = text_to_speech(translated_response, voice=selected_voice)
+                        if audio_bytes:
+                            st.audio(audio_bytes, format="audio/wav")
+    # General question input
+    st.subheader("💡 Ask Any Question About Crop Diseases")
+    # Provide example questions
+    example_questions = [
+        "What are the most common fungal diseases in tomatoes?",
+        "How can I prevent wheat rust in my field?",
+        "What's the difference between bacterial and viral plant diseases?",
+        "Which organic treatments work best for aphid control?",
+        "What are the early signs of nutrient deficiency in corn?",
+        "How do weather conditions affect plant disease development?",
+    ]
+    with st.expander("💡 Example Questions", expanded=False):
+        for i, example in enumerate(example_questions):
+            if st.button(example, key=f"example_{i}"):
+                st.session_state[f"general_question_input"] = example
+    general_question = st.text_area(
+        "Enter your question about crop diseases, plant pathology, or agricultural practices:",
+        placeholder="Example: What are the most effective organic methods to control powdery mildew in grapes?",
+        key="general_question_input",
+        height=100
+    )
+    # Topic categories for better organization
+    st.subheader("🏷️ Question Categories")
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        if st.button("🦠 Disease Identification", key="cat_identification"):
+            st.session_state["general_question_input"] = "How can I identify different types of plant diseases based on symptoms?"
+    with col2:
+        if st.button("💊 Treatment Options", key="cat_treatment"):
+            st.session_state["general_question_input"] = "What are the most effective treatment options for fungal plant diseases?"
+    with col3:
+        if st.button("🛡️ Prevention Methods", key="cat_prevention"):
+            st.session_state["general_question_input"] = "What preventive measures can I take to protect my crops from diseases?"
+    if st.button("Get Expert Answer", key="general_analyze", type="primary"):
+        if general_question.strip():
+            with st.spinner(f"Consulting plant pathology expert using {selected_model}..."):
+                # Translate user input if needed
+                if language != 'en':
+                    try:
+                        translated_input = translator.translate(general_question, dest='en').text
+                        st.info(f"Translated to English: {translated_input}")
+                    except:
+                        translated_input = general_question
+                else:
+                    translated_input = general_question
+                # Generate response using RAG for general consultation
+                response, relevant_chunks, ragas_result = asyncio.run(generate_rag_response_general(
                     translated_input,
+                    st.session_state.conversation_history['general_consultation']
                 ))
                 if response:
+                    # Translate response back to user's language
+                    if language != 'en':
+                        try:
+                            translated_response = translator.translate(response, dest=language).text
+                        except Exception as e:
+                            st.error(f"Translation failed: {e}")
+                            translated_response = response
+                    else:
+                        translated_response = response
+                    # Add to conversation history
+                    st.session_state.conversation_history['general_consultation'].append((general_question, translated_response))
+                    # Display response
+                    st.markdown("### Expert Response")
+                    st.markdown(translated_response)
+                    # Show relevant sources if available
+                    if relevant_chunks:
+                        with st.expander("📚 Information Sources", expanded=False):
+                            for i, chunk in enumerate(relevant_chunks[:3]):  # Show top 3 sources
+                                st.write(f"**Source {i+1}:** {chunk['text'][:200]}...")
+                    # Audio playback option
+                    col1, col2 = st.columns([1, 4])
+                    with col1:
+                        if st.button("🔊 Listen", key="listen_general_latest"):
+                            with st.spinner("Generating audio..."):
+                                audio_bytes = text_to_speech(translated_response, voice=selected_voice)
+                                if audio_bytes:
+                                    st.audio(audio_bytes, format="audio/wav")
                 else:
+                    st.error("Failed to generate a response. Please try again.")
+        else:
+            st.warning("Please enter a question before submitting.")
+    # Display general consultation history
+    if st.session_state.conversation_history['general_consultation']:
+        st.subheader("📝 Consultation History")
+        for i, entry in enumerate(st.session_state.conversation_history['general_consultation']):
+            question, response = entry[:2]
+            with st.expander(f"Q{i+1}: {question[:60]}...", expanded=False):
+                st.write("**Question:**", question)
+                st.write("**Response:**", response)
+                # Feedback buttons for general consultation
+                display_feedback_buttons('general_consultation', i, question, response)
+                # Audio playback for history
+                if st.button("🔊 Listen", key=f"listen_general_history_{i}"):
+                    with st.spinner("Generating audio..."):
+                        audio_bytes = text_to_speech(response, voice=selected_voice)
+                        if audio_bytes:
+                            st.audio(audio_bytes, format="audio/wav")
+    # Export general consultation
+    if st.session_state.conversation_history['general_consultation']:
+        if st.button("📄 Export Consultation", key="export_general"):
+            consultation_text = f"""
+# General Crop Disease Consultation Report
+## Consultation Information
+- Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+- Language: {language}
+- Model Used: {selected_model}
+## Consultation History
+"""
+            for i, entry in enumerate(st.session_state.conversation_history['general_consultation']):
+                question, response = entry[:2]
+                consultation_text += f"\n### Question {i+1}:\n{question}\n\n### Expert Response {i+1}:\n{response}\n\n---\n"
             st.download_button(
+                label="📥 Download Consultation Report",
+                data=consultation_text,
+                file_name=f"crop_disease_consultation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
+                mime="text/markdown",
+                key="download_general"
             )
 # Add a footer with clear instructions
 st.markdown("""
 ---