Spaces:

dicksinyass
/

Councilai

Sleeping

App Files Files Community

dicksinyass commited on Apr 28, 2025

Commit

e090a0a

verified ·

1 Parent(s): 004447c

Update app.py

Browse files

Files changed (1) hide show

app.py +125 -155

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, TextIter
 import random
 import threading
 import torch
-import os
 import time
 from typing import List, Dict, Generator, Tuple, Optional, Union
 import logging
@@ -19,7 +18,7 @@ from datetime import datetime
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
-warnings.filterwarnings("ignore", message="torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly")
 # Enums and Data Classes
 class DebateStyle(str, Enum):
@@ -28,14 +27,14 @@ class DebateStyle(str, Enum):
     BALANCED = "Balanced"
 class OutputStyle(str, Enum):
-    TRANSCRIPT = "Transcript (Markdown)"
-    CHATBOT = "Chatbot (Chat History)"
 @dataclass
 class ModelInfo:
     id: str
     name: str
-    required_memory: str  # Estimated VRAM requirement
     supports_quantization: bool = False
     quantization_config: Optional[Dict] = field(default_factory=dict)
@@ -118,15 +117,8 @@ class DebateHistoryManager:
 # Constants
 MODELS = [
     ModelInfo(
-        "meta-llama/Meta-Llama-3-8B-Instruct",
-        "Llama 3 8B Instruct",
-        "16GB",
-        True,
-        {"load_in_4bit": True, "bnb_4bit_compute_dtype": torch.float16}
-    ),
-    ModelInfo(
-        "Qwen/Qwen1.5-7B-Chat",
-        "Qwen1.5 7B Chat",
         "14GB",
         True,
         {"load_in_4bit": True, "bnb_4bit_compute_dtype": torch.float16}
@@ -138,12 +130,12 @@ MODELS = [
         False
     ),
     ModelInfo(
-        "mistralai/Mistral-7B-Instruct-v0.2",
-        "Mistral 7B Instruct",
         "14GB",
         True,
         {"load_in_4bit": True, "bnb_4bit_compute_dtype": torch.float16}
-    ),
 ]
 PERSONAS = [
@@ -167,13 +159,6 @@ PERSONAS = [
         traits="practical, solution-oriented, experienced",
         style="direct, concise, example-driven",
         emoji="🛠️"
-    ),
-    Persona(
-        name="Dr. Emeka Okafor",
-        description="A social scientist specializing in cultural perspectives.",
-        traits="culturally aware, nuanced, community-focused",
-        style="inclusive, storytelling, perspective-oriented",
-        emoji="🌍"
     )
 ]
@@ -182,7 +167,6 @@ model_cache = {}
 current_device = None
 performance_monitor = ModelPerformance()
-# Core Functions
 def get_device() -> str:
     global current_device
     if current_device:
@@ -201,11 +185,10 @@ def get_device() -> str:
 def clear_model_cache():
     global model_cache
-    for model_id in list(model_cache.keys()):
-        del model_cache[model_id]
     gc.collect()
-    torch.cuda.empty_cache()
-    model_cache = {}
     logger.info("Model cache cleared")
 def load_model(model_info: ModelInfo) -> Tuple[pipeline, AutoTokenizer]:
@@ -217,13 +200,6 @@ def load_model(model_info: ModelInfo) -> Tuple[pipeline, AutoTokenizer]:
     device = get_device()
     kwargs = {"trust_remote_code": True}
-    if device == "cuda":
-        gpu_mem = torch.cuda.get_device_properties(0).total_memory / (1024**3)
-        required_mem = float(model_info.required_memory.replace("GB", ""))
-        if gpu_mem < required_mem and not model_info.supports_quantization:
-            logger.warning(f"Insufficient GPU memory for {model_info.name} (needs {required_mem}GB, has {gpu_mem:.1f}GB)")
-    # Handle quantization if supported and on CUDA
     if device == "cuda" and model_info.supports_quantization:
         kwargs.update(model_info.quantization_config)
         kwargs["device_map"] = "auto"
@@ -236,9 +212,6 @@ def load_model(model_info: ModelInfo) -> Tuple[pipeline, AutoTokenizer]:
         tokenizer = AutoTokenizer.from_pretrained(model_info.id)
         model = AutoModelForCausalLM.from_pretrained(model_info.id, **kwargs)
-        if device == "cuda" and not model_info.supports_quantization:
-            model = model.to(device)
         pipe = pipeline(
             "text-generation",
             model=model,
@@ -354,7 +327,6 @@ def stream_response(
             else:
                 yield buffer.strip()
-        # Record performance metrics
         performance_monitor.record_generation(
             pipe.model.config._name_or_path,
             time.time() - start_time,
@@ -380,7 +352,6 @@ def council_chat_stream(
         yield "Please enter a topic for debate."
         return
-    # Convert string style to Enum if needed
     if isinstance(debate_style, str):
         try:
             debate_style = DebateStyle(debate_style)
@@ -398,10 +369,8 @@ def council_chat_stream(
     loaded_models = []
     for model_info in selected_model_infos:
         try:
-            with gr.Progress() as progress:
-                progress(0, desc=f"Loading {model_info.name}")
-                pipe, tokenizer = load_model(model_info)
-                loaded_models.append((pipe, tokenizer, model_info))
         except Exception as e:
             logger.error(f"Skipping {model_info.name}: {str(e)}")
             yield f"⚠️ Couldn't load {model_info.name}, skipping..."
@@ -424,10 +393,7 @@ def council_chat_stream(
         display_name = f"{persona.emoji} {persona.name} ({model_info.name})"
         participant_names.append(display_name)
-        thinking_msg = f"**{display_name}** is thinking..."
-        current_output = "\n\n".join([f"**User:** {user_prompt}"] + formatted_responses + [thinking_msg])
-        yield current_output
         prompt = create_debate_prompt(
             user_prompt,
             persona,
@@ -438,33 +404,26 @@ def council_chat_stream(
         full_response = ""
         for chunk in stream_response(pipe, tokenizer, prompt, display_name, temperature):
             full_response = chunk
-            current_output = "\n\n".join([f"**User:** {user_prompt}"] + formatted_responses + [chunk])
-            yield current_output
         persona_responses.append(f"{persona.name}: {full_response.split('**:')[-1].strip()}")
         formatted_responses.append(full_response)
-    # Generate synthesis
-    synth_pipe, synth_tokenizer, _ = random.choice(loaded_models)
     synth_prompt = create_synthesis_prompt(user_prompt, persona_responses)
-    yield "\n\n".join([f"**User:** {user_prompt}"] + formatted_responses + ["✨ **Facilitator** is synthesizing..."])
-    synthesis = ""
-    for chunk in stream_response(synth_pipe, synth_tokenizer, synth_prompt, "✨ Facilitator", temperature):
-        synthesis = chunk
-        current_output = "\n\n".join([f"**User:** {user_prompt}"] + formatted_responses + [chunk])
-        yield current_output
     elapsed_time = time.time() - start_time
     transcript = (
         f"**User:** {user_prompt}\n\n" +
         "\n\n".join(formatted_responses) +
-        f"\n\n{synthesis}\n\n" +
         f"---\n*Debate completed in {elapsed_time:.1f} seconds*"
     )
-    # Save to history
     if save_history:
         history_item = DebateHistoryItem(
             id=str(uuid.uuid4()),
@@ -478,104 +437,115 @@ def council_chat_stream(
     yield transcript
-def council_chat_stream_chatbot(
-    user_prompt: str,
-    num_members: int = 3,
-    debate_style: Union[DebateStyle, str] = DebateStyle.BALANCED,
-    temperature: float = 0.7,
-    selected_models: Optional[List[str]] = None,
-    continue_debate: bool = False,
-    history: Optional[List[str]] = None,
-    save_history: bool = True
-) -> Generator[list, None, None]:
-    chat_history = []
-    for output in council_chat_stream(
-        user_prompt, num_members, debate_style, temperature,
-        selected_models, continue_debate, history, save_history
-    ):
-        chat_history.append((None, output))
-        yield chat_history
-# UI Components
-def build_persona_card(persona: Persona) -> gr.Box:
-    with gr.Box(elem_classes="member-card") as card:
-        gr.Markdown(f"""
-        <h3>{persona.emoji} {persona.name}</h3>
-        <p><strong>Description:</strong> {persona.description}</p>
-        <p><strong>Traits:</strong> {persona.traits}</p>
-        <p><strong>Style:</strong> {persona.style}</p>
-        """)
-    return card
-def build_model_info_card(model: ModelInfo) -> gr.Box:
-    with gr.Box(elem_classes="model-card") as card:
-        gr.Markdown(f"""
-        <h3>{model.name}</h3>
-        <p><strong>ID:</strong> {model.id}</p>
-        <p><strong>Memory Requirement:</strong> {model.required_memory}</p>
-        <p><strong>Quantization:</strong> {'Supported' if model.supports_quantization else 'Not Supported'}</p>
-        """)
-    return card
-def build_history_item_ui(history_item: Dict) -> gr.Box:
-    with gr.Box(elem_classes="history-item") as item:
-        with gr.Row():
-            with gr.Column(scale=3):
-                gr.Markdown(f"**{history_item['topic']}**")
-                gr.Markdown(f"*{datetime.fromtimestamp(history_item['timestamp']).strftime('%Y-%m-%d %H:%M:%S')}*")
-            with gr.Column(scale=1):
-                view_btn = gr.Button("View", size="sm")
-                load_btn = gr.Button("Load", size="sm")
-    return item, view_btn, load_btn
-# Gradio Interface
-def build_gradio_interface():
-    custom_css = """
-    .gradio-container { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; }
-    .member-card, .model-card, .history-item {
-        border: 1px solid #e0e0e0;
-        border-radius: 8px;
-        padding: 15px;
-        margin-bottom: 15px;
-        background: #f9f9f9;
-    }
-    .member-card h3, .model-card h3 { margin-top: 0; color: #333; }
-    #transcript-container { position: relative; max-height: 600px; overflow-y: auto; }
-    #chatbot-container { max-height: 600px; }
-    .stats-table { width: 100%; border-collapse: collapse; }
     .stats-table th, .stats-table td { padding: 8px; text-align: left; border-bottom: 1px solid #ddd; }
     """
-    with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
         current_debate = gr.State([])
-        current_history_id = gr.State(None)
-        gr.Markdown("# 🏛️ AI Council Debate\n*Get diverse AI perspectives on any topic*")
         with gr.Row():
             with gr.Column(scale=2):
-                # Debate Input Section
-                with gr.Group():
-                    user_prompt = gr.Textbox(
-                        label="Debate Topic",
-                        placeholder="Enter your question or topic for debate...",
-                        lines=4,
-                        max_lines=6
-                    )
-                    with gr.Accordion("⚙️ Debate Settings", open=False):
-                        with gr.Row():
-                            num_members = gr.Slider(
-                                minimum=2,
-                                maximum=len(PERSONAS),
-                                value=3,
-                                step=1,
-                                label="Number of Council Members"
-                            )
-                            debate_style = gr.Radio(
-                                list(DebateStyle),
-                                value=DebateStyle.BALANCED,
-                                label="Debate Style"
-                            )

 import random
 import threading
 import torch
 import time
 from typing import List, Dict, Generator, Tuple, Optional, Union
 import logging
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
+warnings.filterwarnings("ignore")
 # Enums and Data Classes
 class DebateStyle(str, Enum):
     BALANCED = "Balanced"
 class OutputStyle(str, Enum):
+    TRANSCRIPT = "Transcript"
+    CHATBOT = "Chatbot"
 @dataclass
 class ModelInfo:
     id: str
     name: str
+    required_memory: str
     supports_quantization: bool = False
     quantization_config: Optional[Dict] = field(default_factory=dict)
 # Constants
 MODELS = [
     ModelInfo(
+        "mistralai/Mistral-7B-Instruct-v0.2",
+        "Mistral 7B Instruct",
         "14GB",
         True,
         {"load_in_4bit": True, "bnb_4bit_compute_dtype": torch.float16}
         False
     ),
     ModelInfo(
+        "Qwen/Qwen1.5-7B-Chat",
+        "Qwen1.5 7B Chat",
         "14GB",
         True,
         {"load_in_4bit": True, "bnb_4bit_compute_dtype": torch.float16}
+    )
 ]
 PERSONAS = [
         traits="practical, solution-oriented, experienced",
         style="direct, concise, example-driven",
         emoji="🛠️"
     )
 ]
 current_device = None
 performance_monitor = ModelPerformance()
 def get_device() -> str:
     global current_device
     if current_device:
 def clear_model_cache():
     global model_cache
+    model_cache.clear()
     gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
     logger.info("Model cache cleared")
 def load_model(model_info: ModelInfo) -> Tuple[pipeline, AutoTokenizer]:
     device = get_device()
     kwargs = {"trust_remote_code": True}
     if device == "cuda" and model_info.supports_quantization:
         kwargs.update(model_info.quantization_config)
         kwargs["device_map"] = "auto"
         tokenizer = AutoTokenizer.from_pretrained(model_info.id)
         model = AutoModelForCausalLM.from_pretrained(model_info.id, **kwargs)
         pipe = pipeline(
             "text-generation",
             model=model,
             else:
                 yield buffer.strip()
         performance_monitor.record_generation(
             pipe.model.config._name_or_path,
             time.time() - start_time,
         yield "Please enter a topic for debate."
         return
     if isinstance(debate_style, str):
         try:
             debate_style = DebateStyle(debate_style)
     loaded_models = []
     for model_info in selected_model_infos:
         try:
+            pipe, tokenizer = load_model(model_info)
+            loaded_models.append((pipe, tokenizer, model_info))
         except Exception as e:
             logger.error(f"Skipping {model_info.name}: {str(e)}")
             yield f"⚠️ Couldn't load {model_info.name}, skipping..."
         display_name = f"{persona.emoji} {persona.name} ({model_info.name})"
         participant_names.append(display_name)
+        yield f"**{display_name}** is thinking..."
         prompt = create_debate_prompt(
             user_prompt,
             persona,
         full_response = ""
         for chunk in stream_response(pipe, tokenizer, prompt, display_name, temperature):
             full_response = chunk
+            yield chunk
         persona_responses.append(f"{persona.name}: {full_response.split('**:')[-1].strip()}")
         formatted_responses.append(full_response)
+    synth_pipe, synth_tokenizer, _ = loaded_models[0]
     synth_prompt = create_synthesis_prompt(user_prompt, persona_responses)
+    yield "✨ **Facilitator** is synthesizing..."
+    for chunk in stream_response(synth_pipe, synth_tokenizer, synth_prompt, "Facilitator", temperature):
+        yield chunk
     elapsed_time = time.time() - start_time
     transcript = (
         f"**User:** {user_prompt}\n\n" +
         "\n\n".join(formatted_responses) +
+        f"\n\n**Facilitator:** {chunk.split('**:')[-1].strip()}\n\n" +
         f"---\n*Debate completed in {elapsed_time:.1f} seconds*"
     )
     if save_history:
         history_item = DebateHistoryItem(
             id=str(uuid.uuid4()),
     yield transcript
+def create_interface():
+    css = """
+    .member-card { border: 1px solid #e0e0e0; border-radius: 8px; padding: 15px; margin: 10px; background: #f9f9f9; }
+    .member-card h3 { margin-top: 0; }
+    #debate-output { max-height: 500px; overflow-y: auto; padding: 10px; border: 1px solid #ddd; border-radius: 8px; }
+    .history-item { border: 1px solid #e0e0e0; border-radius: 8px; padding: 10px; margin: 5px 0; background: #f5f5f5; }
+    .stats-table { width: 100%; border-collapse: collapse; margin-top: 10px; }
     .stats-table th, .stats-table td { padding: 8px; text-align: left; border-bottom: 1px solid #ddd; }
     """
+    with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
         current_debate = gr.State([])
+        gr.Markdown("# 🏛️ AI Council Debate")
         with gr.Row():
             with gr.Column(scale=2):
+                user_input = gr.Textbox(label="Debate Topic", lines=3)
+                with gr.Row():
+                    num_members = gr.Slider(2, len(PERSONAS), value=3, step=1, label="Number of Members")
+                    temperature = gr.Slider(0.1, 1.0, value=0.7, label="Creativity")
+                debate_style = gr.Radio(
+                    list(DebateStyle),
+                    value=DebateStyle.BALANCED,
+                    label="Debate Style"
+                )
+                model_selection = gr.CheckboxGroup(
+                    choices=[model.name for model in MODELS],
+                    value=[model.name for model in MODELS],
+                    label="Select Models"
+                )
+                with gr.Row():
+                    submit_btn = gr.Button("Start Debate", variant="primary")
+                    clear_btn = gr.Button("Clear", variant="secondary")
+                    continue_btn = gr.Checkbox(label="Continue Debate", value=False)
+                    save_history = gr.Checkbox(label="Save History", value=True)
+            with gr.Column(scale=3):
+                output = gr.HTML(elem_id="debate-output")
+        with gr.Accordion("👥 Council Members", open=False):
+            for persona in PERSONAS:
+                with gr.Group(elem_classes="member-card"):
+                    gr.Markdown(f"""
+                    <h3>{persona.emoji} {persona.name}</h3>
+                    <p><strong>Description:</strong> {persona.description}</p>
+                    <p><strong>Style:</strong> {persona.style}</p>
+                    <p><strong>Traits:</strong> {persona.traits}</p>
+                    """)
+        with gr.Accordion("📜 Debate History", open=False):
+            history_output = gr.Column()
+            refresh_history = gr.Button("Refresh History")
+        with gr.Accordion("📊 Performance Stats", open=False):
+            stats_output = gr.HTML()
+            refresh_stats = gr.Button("Refresh Stats")
+        def debate_wrapper(user_prompt, num_members, debate_style, temperature, model_selection, continue_debate, save_history, current_debate):
+            selected_models = [m.id for m in MODELS if m.name in model_selection]
+            return council_chat_stream(
+                user_prompt, num_members, debate_style, temperature,
+                selected_models, continue_debate, current_debate, save_history
+            )
+        def update_history(history, new_output):
+            if "Facilitator" in new_output:
+                return []
+            return history + [new_output] if history else [new_output]
+        def load_history():
+            history = DebateHistoryManager.load_history()
+            return [
+                gr.Group(elem_classes="history-item", visible=True, render=False) for _ in history
+            ]
+        def show_stats():
+            stats = "<table class='stats-table'><tr><th>Model</th><th>Calls</th><th>Avg Time</th><th>Tokens/s</th></tr>"
+            for model in MODELS:
+                data = performance_monitor.get_stats(model.id)
+                stats += f"""
+                <tr>
+                    <td>{model.name}</td>
+                    <td>{data['total_calls']}</td>
+                    <td>{data['avg_time']:.2f}s</td>
+                    <td>{data['tokens_per_second']:.1f}</td>
+                </tr>
+                """
+            stats += "</table>"
+            return stats
+        submit_btn.click(
+            debate_wrapper,
+            [user_input, num_members, debate_style, temperature, model_selection, continue_btn, save_history, current_debate],
+            output
+        ).then(
+            lambda x: x,
+            output,
+            current_debate,
+            preprocess=update_history
+        )
+        clear_btn.click(lambda: "", None, output)
+        refresh_history.click(load_history, None, history_output)
+        refresh_stats.click(show_stats, None, stats_output)
+    return app
+if __name__ == "__main__":
+    get_device()
+    app = create_interface()
+    app.launch()