Spaces:

jdesiree
/

Mimir

Sleeping

App Files Files Community

jdesiree commited on Aug 24, 2025

Commit

766d00f

verified ·

1 Parent(s): 88b796b

Major Update

Browse files

Changed to an Any-to-Any model, Qwen2.5-Omni.
- Removed mock-streaming, as Omni has this feature.
- Made necessary adjustments to implement Omni
- Added toggle button for voice responses
- Removed now unused imports
- Increased Token limit to match smart truncate
- Reinforced JSON file format for Graph tool

Files changed (1) hide show

app.py +148 -61

app.py CHANGED Viewed

@@ -9,12 +9,16 @@ import re
 import requests
 from langchain.tools import BaseTool
 from langchain.agents import initialize_agent, AgentType
-from langchain_community.llms import HuggingFaceHub
 from langchain.memory import ConversationBufferWindowMemory
-from langchain.prompts import PromptTemplate
-from langchain.schema import SystemMessage, HumanMessage, AIMessage
 from pydantic import BaseModel, Field
-from typing import Type, Optional
 # --- Environment and Logging Setup ---
 logging.basicConfig(level=logging.INFO)
@@ -30,7 +34,7 @@ metrics_tracker = EduBotMetrics(save_file="edu_metrics.json")
 # --- LangChain Tool Definition ---
 class GraphInput(BaseModel):
     data_json: str = Field(description="JSON string of data for the graph")
-    labels_json: str = Field(description="JSON string of labels for the graph")
     plot_type: str = Field(description="Type of plot: bar, line, or pie")
     title: str = Field(description="Title for the graph")
     x_label: str = Field(description="X-axis label", default="")
@@ -38,12 +42,42 @@ class GraphInput(BaseModel):
 class CreateGraphTool(BaseTool):
     name: str = "create_graph"
-    description: str = """Generates a plot (bar, line, or pie) and returns it as an HTML-formatted Base64-encoded image string. Use this tool when teaching concepts that benefit from visual representation, such as: statistical distributions, mathematical functions, data comparisons, survey results, grade analyses, scientific relationships, economic models, or any quantitative information that would be clearer with a graph. The data and labels arguments must be JSON-encoded strings."""
     args_schema: Type[BaseModel] = GraphInput
-    def _run(self, data_json: str, labels_json: str, plot_type: str,
-             title: str, x_label: str = "", y_label: str = "") -> str:
         try:
             return generate_plot(
                 data_json=data_json,
                 labels_json=labels_json,
@@ -55,6 +89,7 @@ class CreateGraphTool(BaseTool):
         except Exception as e:
             return f"<p style='color:red;'>Error creating graph: {str(e)}</p>"
 # --- System Prompt ---
 SYSTEM_PROMPT = """You are EduBot, an expert multi-concept tutor designed to facilitate genuine learning and understanding. Your primary mission is to guide students through the learning process rather than providing direct answers to academic work.
@@ -126,32 +161,51 @@ def initialize_system_prompt(agent):
         agent.memory.chat_memory.add_message(system_message)
         system_prompt_initialized = True
-def create_langchain_agent():
-    """Initialize LangChain agent with tools and memory."""
-    # Initialize LLM
-    llm = HuggingFaceHub(
-        repo_id="Qwen/Qwen2.5-VL-7B-Instruct",
-        huggingfacehub_api_token=hf_token,
-        model_kwargs={
-            "temperature": 0.7,
-            "max_new_tokens": 1000,
-            "top_p": 0.9,
-            "return_full_text": False
-        }
-    )
-    # Initialize tools
-    tools = [CreateGraphTool()]
-    # Initialize memory
     memory = ConversationBufferWindowMemory(
         memory_key="chat_history",
         k=10,
         return_messages=True
     )
-    # Create agent WITHOUT system prompt in prefix (we'll add it to memory instead)
     agent = initialize_agent(
         tools=tools,
         llm=llm,
@@ -174,6 +228,52 @@ def get_agent():
         agent = create_langchain_agent()
     return agent
 # --- UI: MathJax Configuration ---
 mathjax_config = '''
 <script>
@@ -271,7 +371,6 @@ def chat_response(message, history=None):
         logger.info(f"Message type: {type(message)}")
         logger.info(f"Message content: {message}")
-        # This line might be causing the issue
         try:
             metrics_tracker.log_interaction(message, "user_query", "chat_start")
             logger.info("Metrics interaction logged successfully")
@@ -308,36 +407,21 @@ def chat_response(message, history=None):
         logger.error(f"Full traceback: {traceback.format_exc()}")
         return f"I apologize, but I encountered an error while processing your message: {str(e)}"
-def respond_with_enhanced_streaming(message, history=None):
-    """Enhanced streaming response function."""
-    try:
-        response = chat_response(message)
-        yield response
-    except Exception as e:
-        logger.error(f"Error in streaming response: {e}")
-        yield f"I apologize, but I encountered an error: {str(e)}"
-# --- UI: Event Handlers ---
-def respond_and_update(message, history):
     """Main function to handle user submission."""
     if not message.strip():
-        return history, ""
     # Add user message to history
     history.append({"role": "user", "content": message})
-    # Yield history to show the user message immediately, and clear the textbox
-    yield history, ""
-    # Stream the bot's response
-    full_response = ""
-    for response_chunk in respond_with_enhanced_streaming(message, history):
-        full_response = response_chunk
-        # Update the last message (bot's response)
-        if len(history) > 0 and history[-1]["role"] == "user":
-            history.append({"role": "assistant", "content": full_response})
-        else:
-            history[-1] = {"role": "assistant", "content": full_response}
-        yield history, ""
 def clear_chat():
     """Clear the chat history and reset system prompt flag."""
@@ -347,7 +431,6 @@ def clear_chat():
     system_prompt_initialized = False
     return [], ""
 # --- UI: Interface Creation ---
 def create_interface():
     """Creates and configures the complete Gradio interface."""
@@ -358,9 +441,9 @@ def create_interface():
         with open("styles.css", "r", encoding="utf-8") as css_file:
             custom_css = css_file.read()
     except FileNotFoundError:
-        logger.warning("style.css file not found, using default styling")
     except Exception as e:
-        logger.warning(f"Error reading style.css: {e}")
     with gr.Blocks(
         title="EduBot",
@@ -405,14 +488,18 @@ def create_interface():
                 with gr.Column(elem_classes=["button-column"], scale=1):
                     send = gr.Button("Send", elem_classes=["send-button"], size="sm")
                     clear = gr.Button("Clear", elem_classes=["clear-button"], size="sm")
-        # Set up event handlers
-        msg.submit(respond_and_update, [msg, chatbot], [chatbot, msg])
-        send.click(respond_and_update, [msg, chatbot], [chatbot, msg])
-        clear.click(clear_chat, outputs=[chatbot, msg])
-        # Apply CSS at the very end for highest precedence
-        gr.HTML(f'<style>{custom_css}</style>')
         return demo

 import requests
 from langchain.tools import BaseTool
 from langchain.agents import initialize_agent, AgentType
 from langchain.memory import ConversationBufferWindowMemory
+from langchain.schema import SystemMessage
+from langchain.llms.base import LLM
+from typing import Optional, List, Any, Type
 from pydantic import BaseModel, Field
+from transformers import Qwen2_5OmniForConditionalGeneration, Qwen2_5OmniProcessor
+from qwen_omni_utils import process_mm_info
+import soundfile as sf
+import atexit
+import glob
 # --- Environment and Logging Setup ---
 logging.basicConfig(level=logging.INFO)
 # --- LangChain Tool Definition ---
 class GraphInput(BaseModel):
     data_json: str = Field(description="JSON string of data for the graph")
+    labels_json: str = Field(description="JSON string of labels for the graph", default="[]")
     plot_type: str = Field(description="Type of plot: bar, line, or pie")
     title: str = Field(description="Title for the graph")
     x_label: str = Field(description="X-axis label", default="")
 class CreateGraphTool(BaseTool):
     name: str = "create_graph"
+    description: str = """Generates a plot (bar, line, or pie) and returns it as an HTML-formatted Base64-encoded image string. Use this tool when teaching concepts that benefit from visual representation, such as: statistical distributions, mathematical functions, data comparisons, survey results, grade analyses, scientific relationships, economic models, or any quantitative information that would be clearer with a graph.
+REQUIRED FORMAT:
+- data_json: A JSON dictionary where keys are category names and values are numbers
+  Example: '{"Math": 85, "Science": 92, "English": 78}'
+- labels_json: A JSON list, only needed for pie charts if you want custom labels different from the data keys. For bar/line charts, use empty list: '[]'
+  Example for pie: '["Mathematics", "Science", "English Literature"]'
+  Example for bar/line: '[]'
+EXAMPLES:
+Bar chart: data_json='{"Q1": 1000, "Q2": 1200, "Q3": 950}', labels_json='[]'
+Line chart: data_json='{"Jan": 100, "Feb": 120, "Mar": 110}', labels_json='[]'
+Pie chart: data_json='{"A": 30, "B": 45, "C": 25}', labels_json='["Category A", "Category B", "Category C"]'
+Always use proper JSON formatting with quotes around keys and string values."""
     args_schema: Type[BaseModel] = GraphInput
+    def _run(self, data_json: str, labels_json: str = "[]", plot_type: str = "bar",
+             title: str = "Chart", x_label: str = "", y_label: str = "") -> str:
         try:
+            # Validate JSON format before passing to generate_plot
+            import json
+            try:
+                data_parsed = json.loads(data_json)
+                labels_parsed = json.loads(labels_json)
+                # Validate data structure
+                if not isinstance(data_parsed, dict):
+                    return "<p style='color:red;'>data_json must be a JSON dictionary with string keys and numeric values.</p>"
+                if not isinstance(labels_parsed, list):
+                    return "<p style='color:red;'>labels_json must be a JSON list (use [] if no custom labels needed).</p>"
+            except json.JSONDecodeError as json_error:
+                return f"<p style='color:red;'>Invalid JSON format: {str(json_error)}. Ensure proper JSON formatting with quotes.</p>"
             return generate_plot(
                 data_json=data_json,
                 labels_json=labels_json,
         except Exception as e:
             return f"<p style='color:red;'>Error creating graph: {str(e)}</p>"
 # --- System Prompt ---
 SYSTEM_PROMPT = """You are EduBot, an expert multi-concept tutor designed to facilitate genuine learning and understanding. Your primary mission is to guide students through the learning process rather than providing direct answers to academic work.
         agent.memory.chat_memory.add_message(system_message)
         system_prompt_initialized = True
+class Qwen25OmniLLM(LLM):
+    model: Any = None
+    processor: Any = None
+    def __init__(self, model_path: str = "Qwen/Qwen2.5-Omni-7B"):
+        super().__init__()
+        self.model = Qwen2_5OmniForConditionalGeneration.from_pretrained(
+            model_path,
+            torch_dtype="auto",
+            device_map="auto"
+        )
+        self.processor = Qwen2_5OmniProcessor.from_pretrained(model_path)
+    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+        # Implementation for text-only responses
+        conversation = [
+            {"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT}]},
+            {"role": "user", "content": [{"type": "text", "text": prompt}]}
+        ]
+        text = self.processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
+        audios, images, videos = process_mm_info(conversation, use_audio_in_video=False)
+        inputs = self.processor(text=text, audio=audios, images=images, videos=videos, return_tensors="pt", padding=True)
+        inputs = inputs.to(self.model.device)
+        text_ids = self.model.generate(**inputs, return_audio=False)
+        response = self.processor.batch_decode(text_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+        return response
+    @property
+    def _llm_type(self) -> str:
+        return "qwen25_omni"
+def create_langchain_agent():
+    # Replace HuggingFaceHub with custom LLM
+    llm = Qwen25OmniLLM()
+    # Rest remains the same
+    tools = [CreateGraphTool()]
     memory = ConversationBufferWindowMemory(
         memory_key="chat_history",
         k=10,
         return_messages=True
     )
     agent = initialize_agent(
         tools=tools,
         llm=llm,
         agent = create_langchain_agent()
     return agent
+def generate_voice_response(text_response: str, voice_enabled: bool = False) -> Optional[str]:
+    """Generate audio response if voice is enabled."""
+    if not voice_enabled:
+        return None
+    try:
+        current_agent = get_agent()
+        model = current_agent.llm.model
+        processor = current_agent.llm.processor
+        if not hasattr(model, 'generate') or not hasattr(model.generate, '__code__'):
+            logger.warning("Model may not support audio generation")
+            return None
+        conversation = [
+            {"role": "system", "content": [{"type": "text", "text": "You are Qwen, a virtual human developed by the Qwen Team, Alibaba Group, capable of perceiving auditory and visual inputs, as well as generating text and speech."}]},
+            {"role": "user", "content": [{"type": "text", "text": "Please read this response aloud: " + text_response}]}
+        ]
+        text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
+        audios, images, videos = process_mm_info(conversation, use_audio_in_video=False)
+        inputs = processor(text=text, audio=audios, images=images, videos=videos, return_tensors="pt", padding=True)
+        inputs = inputs.to(model.device)
+        text_ids, audio = model.generate(**inputs, speaker="Ethan")
+        # Save audio to temporary file
+        audio_path = f"temp_audio_{int(time.time())}.wav"
+        sf.write(audio_path, audio.reshape(-1).detach().cpu().numpy(), samplerate=24000)
+        return audio_path
+    except Exception as e:
+        logger.error(f"Error generating voice response: {e}")
+        return None
+def cleanup_temp_audio():
+    """Clean up temporary audio files on exit."""
+    for file in glob.glob("temp_audio_*.wav"):
+        try:
+            os.remove(file)
+        except:
+            pass
+# Register cleanup function
+atexit.register(cleanup_temp_audio)
 # --- UI: MathJax Configuration ---
 mathjax_config = '''
 <script>
         logger.info(f"Message type: {type(message)}")
         logger.info(f"Message content: {message}")
         try:
             metrics_tracker.log_interaction(message, "user_query", "chat_start")
             logger.info("Metrics interaction logged successfully")
         logger.error(f"Full traceback: {traceback.format_exc()}")
         return f"I apologize, but I encountered an error while processing your message: {str(e)}"
+def respond_and_update(message, history, voice_enabled):
     """Main function to handle user submission."""
     if not message.strip():
+        return history, "", None
     # Add user message to history
     history.append({"role": "user", "content": message})
+    yield history, "", None
+    # Generate response directly (no mock streaming)
+    response = chat_response(message)
+    audio_path = generate_voice_response(response, voice_enabled) if voice_enabled else None
+    history.append({"role": "assistant", "content": response})
+    yield history, "", audio_path
 def clear_chat():
     """Clear the chat history and reset system prompt flag."""
     system_prompt_initialized = False
     return [], ""
 # --- UI: Interface Creation ---
 def create_interface():
     """Creates and configures the complete Gradio interface."""
         with open("styles.css", "r", encoding="utf-8") as css_file:
             custom_css = css_file.read()
     except FileNotFoundError:
+        logger.warning("styles.css file not found, using default styling")
     except Exception as e:
+        logger.warning(f"Error reading styles.css: {e}")
     with gr.Blocks(
         title="EduBot",
                 with gr.Column(elem_classes=["button-column"], scale=1):
                     send = gr.Button("Send", elem_classes=["send-button"], size="sm")
                     clear = gr.Button("Clear", elem_classes=["clear-button"], size="sm")
+                    voice_toggle = gr.Checkbox(label="Enable Voice (Ethan)", value=False, elem_classes=["voice-toggle"])
+            # Add audio output component
+            audio_output = gr.Audio(label="Voice Response", visible=True, autoplay=True)
+            # Event handlers - INSIDE the Blocks context
+            msg.submit(respond_and_update, [msg, chatbot, voice_toggle], [chatbot, msg, audio_output])
+            send.click(respond_and_update, [msg, chatbot, voice_toggle], [chatbot, msg, audio_output])
+            clear.click(clear_chat, outputs=[chatbot, msg])
+            # Apply CSS at the very end
+            gr.HTML(f'<style>{custom_css}</style>')
         return demo