Spaces:

mgbam
/

NeuroResearch_AI

Running

App Files Files Community

mgbam commited on Mar 11, 2025

Commit

ddd0e04

verified ·

1 Parent(s): 3ae27aa

Update app.py

Browse files

Files changed (1) hide show

app.py +396 -248

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ------------------------------
-# Imports
 # ------------------------------
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores import Chroma
@@ -9,306 +9,454 @@ from langgraph.graph import END, StateGraph
 from langgraph.prebuilt import ToolNode
 from langgraph.graph.message import add_messages
 from typing_extensions import TypedDict, Annotated
-from typing import Sequence, List, Dict, Any
 import chromadb
 import re
 import os
 import streamlit as st
 import requests
-import time
-import hashlib
 from langchain.tools.retriever import create_retriever_tool
-from datetime import datetime
 # ------------------------------
-# Data
 # ------------------------------
 research_texts = [
     "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
     "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
-    "Latest Trends in Machine Learning Methods Using Quantum Computing",
-    "Advancements in Neuromorphic Computing for Energy-Efficient AI Systems",
-    "Cross-Modal Learning: Integrating Visual and Textual Representations for Multimodal AI"
 ]
 development_texts = [
     "Project A: UI Design Completed, API Integration in Progress",
     "Project B: Testing New Feature X, Bug Fixes Needed",
-    "Product Y: In the Performance Optimization Stage Before Release",
-    "Framework Z: Version 3.2 Released with Enhanced Distributed Training Support",
-    "DevOps Pipeline: Automated CI/CD Implementation for ML Model Deployment"
 ]
 # ------------------------------
-# Configuration
 # ------------------------------
-class AppConfig:
-    def __init__(self):
-        self.DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
-        self.CHROMA_PATH = "chroma_db"
-        self.MAX_RETRIES = 3
-        self.RETRY_DELAY = 1.5
-        self.DOCUMENT_CHUNK_SIZE = 300
-        self.DOCUMENT_OVERLAP = 50
-        self.SEARCH_K = 5
-        self.SEARCH_TYPE = "mmr"
-    def validate(self):
-        if not self.DEEPSEEK_API_KEY:
-            st.error("""
-            **Configuration Error**
-            🔑 Missing DeepSeek API key.
-            Configure through Hugging Face Space secrets:
-            1. Space Settings → Repository secrets
-            2. Add secret: DEEPSEEK_API_KEY=your_key
-            3. Rebuild Space
-            """)
-            st.stop()
 # ------------------------------
-# Chroma Setup
 # ------------------------------
-class ChromaManager:
-    def __init__(self, config: AppConfig):
-        os.makedirs(config.CHROMA_PATH, exist_ok=True)
-        self.client = chromadb.PersistentClient(path=config.CHROMA_PATH)
-        self.embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
-        self.research_collection = self._create_collection(
-            research_texts,
-            "research_collection",
-            {"category": "research"}
-        )
-        self.dev_collection = self._create_collection(
-            development_texts,
-            "development_collection",
-            {"category": "development"}
-        )
-    def _create_collection(self, documents: List[str], name: str, metadata: dict) -> Chroma:
-        splitter = RecursiveCharacterTextSplitter(
-            chunk_size=300,
-            chunk_overlap=50,
-            separators=["\n\n", "\n", "。"]
-        )
-        docs = splitter.create_documents(documents)
-        return Chroma.from_documents(
-            documents=docs,
-            embedding=self.embeddings,
-            client=self.client,
-            collection_name=name,
-            collection_metadata=metadata
-        )
 # ------------------------------
-# Document Processing
 # ------------------------------
-class DocumentProcessor:
-    @staticmethod
-    def deduplicate(docs: List[Any]) -> List[Any]:
-        seen = set()
-        return [doc for doc in docs
-                if not (hashlib.md5(doc.page_content.encode()).hexdigest() in seen
-                        or seen.add(hashlib.md5(doc.page_content.encode()).hexdigest()))]
-    @staticmethod
-    def extract_keypoints(docs: List[Any]) -> str:
-        categories = {
-            "quantum": ["quantum", "qubit"],
-            "vision": ["image", "recognition"],
-            "nlp": ["transformer", "language"]
-        }
-        return "\n".join(sorted({
-            "- " + {
-                "quantum": "Quantum computing breakthroughs",
-                "vision": "Computer vision advancements",
-                "nlp": "NLP architecture innovations"
-            }[cat]
-            for doc in docs
-            for cat, kw in categories.items()
-            if any(k in doc.page_content.lower() for k in kw)
-        }))
 # ------------------------------
-# Workflow State
 # ------------------------------
 class AgentState(TypedDict):
     messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
-# ------------------------------
-# Workflow Setup
-# ------------------------------
-class AgentWorkflow:
-    def __init__(self, chroma: ChromaManager):
-        self.chroma = chroma
-        self.workflow = StateGraph(AgentState)
-        # Define nodes
-        self.workflow.add_node("agent", self.agent)
-        self.workflow.add_node("retrieve", ToolNode([
-            create_retriever_tool(
-                chroma.research_collection.as_retriever(),
-                "research_tool",
-                "Search research documents"
-            ),
-            create_retriever_tool(
-                chroma.dev_collection.as_retriever(),
-                "dev_tool",
-                "Search development updates"
-            )
-        ]))
-        self.workflow.add_node("generate", self.generate)
-        self.workflow.add_node("rewrite", self.rewrite)
-        # Define edges
-        self.workflow.set_entry_point("agent")
-        self.workflow.add_conditional_edges(
-            "agent",
-            self._tools_condition,
-            {"retrieve": "retrieve", "end": END}
-        )
-        self.workflow.add_conditional_edges(
-            "retrieve",
-            self._grade_documents,
-            {"generate": "generate", "rewrite": "rewrite"}
         )
-        self.workflow.add_edge("generate", END)
-        self.workflow.add_edge("rewrite", "agent")
-        self.app = self.workflow.compile()
-    def agent(self, state: AgentState):
-        try:
-            messages = state["messages"]
-            query = messages[-1].content if isinstance(messages[-1], HumanMessage) else messages[-1]['content']
-            response = requests.post(
-                "https://api.deepseek.com/v1/chat/completions",
-                headers={"Authorization": f"Bearer {config.DEEPSEEK_API_KEY}"},
-                json={
-                    "model": "deepseek-chat",
-                    "messages": [{
-                        "role": "user",
-                        "content": f"""Analyze this query: "{query}"
-                        Respond EXACTLY as:
-                        - SEARCH_RESEARCH: <terms> (for research topics)
-                        - SEARCH_DEV: <terms> (for development updates)
-                        - DIRECT: <answer> (otherwise)"""
-                    }]
-                }
-            ).json()
-            content = response['choices'][0]['message']['content']
-            if "SEARCH_RESEARCH:" in content:
-                terms = content.split("SEARCH_RESEARCH:")[1].strip()
-                results = self.chroma.research_collection.similarity_search(terms)
-                return {"messages": [AIMessage(content=f"Research Results: {str(results)}")]}
-            elif "SEARCH_DEV:" in content:
-                terms = content.split("SEARCH_DEV:")[1].strip()
-                results = self.chroma.dev_collection.similarity_search(terms)
-                return {"messages": [AIMessage(content=f"Development Results: {str(results)}")]}
-            return {"messages": [AIMessage(content=content)]}
-        except Exception as e:
-            return {"messages": [AIMessage(content=f"Error: {str(e)}")]}
-    def generate(self, state: AgentState):
-        docs = eval(state["messages"][-1].content.split("Results: ")[1])
-        processed = "\n".join([d.page_content[:200] for d in DocumentProcessor.deduplicate(docs)])
         response = requests.post(
             "https://api.deepseek.com/v1/chat/completions",
-            headers={"Authorization": f"Bearer {config.DEEPSEEK_API_KEY}"},
-            json={
-                "model": "deepseek-chat",
-                "messages": [{
-                    "role": "user",
-                    "content": f"Summarize these findings:\n{processed}"
-                }]
-            }
-        ).json()
-        return {"messages": [AIMessage(content=response['choices'][0]['message']['content'])]}
-    def rewrite(self, state: AgentState):
-        original = state["messages"][0].content
         response = requests.post(
             "https://api.deepseek.com/v1/chat/completions",
-            headers={"Authorization": f"Bearer {config.DEEPSEEK_API_KEY}"},
-            json={
-                "model": "deepseek-chat",
-                "messages": [{
-                    "role": "user",
-                    "content": f"Rephrase this query: {original}"
-                }]
-            }
-        ).json()
-        return {"messages": [AIMessage(content=response['choices'][0]['message']['content'])]}
-    def _tools_condition(self, state: AgentState):
-        return "retrieve" if "Results:" in state["messages"][-1].content else "end"
-    def _grade_documents(self, state: AgentState):
-        return "generate" if len(eval(state["messages"][-1].content.split("Results: ")[1])) > 0 else "rewrite"
 # ------------------------------
-# Streamlit App
 # ------------------------------
-def apply_theme():
-    st.markdown("""
-    <style>
-    .stApp { background: #1a1a1a; color: white; }
-    .stTextArea textarea { background: #2d2d2d !important; color: white !important; }
-    .stButton>button { background: #2E86C1; transition: 0.3s; }
-    .stButton>button:hover { background: #1B4F72; transform: scale(1.02); }
-    .data-box { background: #2d2d2d; border-left: 4px solid #2E86C1; padding: 15px; margin: 10px 0; }
-    </style>
-    """, unsafe_allow_html=True)
-def main(config: AppConfig, chroma: ChromaManager):
-    apply_theme()
-    with st.sidebar:
-        st.header("📚 Databases")
-        with st.expander("Research", expanded=True):
-            for text in research_texts:
-                st.markdown(f'<div class="data-box">{text}</div>', unsafe_allow_html=True)
-        with st.expander("Development"):
-            for text in development_texts:
-                st.markdown(f'<div class="data-box">{text}</div>', unsafe_allow_html=True)
-    st.title("🔍 AI Research Assistant")
-    query = st.text_area("Enter your query:", height=100)
-    if st.button("Analyze"):
-        with st.spinner("Processing..."):
-            try:
-                workflow = AgentWorkflow(chroma)
-                results = workflow.app.invoke({"messages": [HumanMessage(content=query)]})
-                with st.expander("Processing Details", expanded=True):
-                    st.write("### Raw Results", results)
-                st.success("### Final Answer")
-                st.markdown(results['messages'][-1].content)
-            except Exception as e:
-                st.error(f"Processing failed: {str(e)}")
 # ------------------------------
-# Initialization
 # ------------------------------
-if __name__ == "__main__":
     st.set_page_config(
-        page_title="AI Research Assistant",
         layout="wide",
         initial_sidebar_state="expanded"
     )
-    try:
-        config = AppConfig()
-        config.validate()
-        chroma = ChromaManager(config)
-        main(config, chroma)
-    except Exception as e:
-        st.error(f"Initialization failed: {str(e)}")

 # ------------------------------
+# Imports & Dependencies
 # ------------------------------
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores import Chroma
 from langgraph.prebuilt import ToolNode
 from langgraph.graph.message import add_messages
 from typing_extensions import TypedDict, Annotated
+from typing import Sequence
 import chromadb
 import re
 import os
 import streamlit as st
 import requests
 from langchain.tools.retriever import create_retriever_tool
 # ------------------------------
+# Configuration
+# ------------------------------
+# Get DeepSeek API key from Hugging Face Space secrets
+DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
+if not DEEPSEEK_API_KEY:
+    st.error("""
+    **Missing API Configuration**
+    Please configure your DeepSeek API key in Hugging Face Space secrets:
+    1. Go to your Space's Settings
+    2. Click on 'Repository secrets'
+    3. Add a secret named DEEPSEEK_API_KEY
+    """)
+    st.stop()
+# Create directory for Chroma persistence
+os.makedirs("chroma_db", exist_ok=True)
+# ------------------------------
+# ChromaDB Client Configuration
+# ------------------------------
+chroma_client = chromadb.PersistentClient(path="chroma_db")
+# ------------------------------
+# Dummy Data: Research & Development Texts
 # ------------------------------
 research_texts = [
     "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
     "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
+    "Latest Trends in Machine Learning Methods Using Quantum Computing"
 ]
 development_texts = [
     "Project A: UI Design Completed, API Integration in Progress",
     "Project B: Testing New Feature X, Bug Fixes Needed",
+    "Product Y: In the Performance Optimization Stage Before Release"
 ]
 # ------------------------------
+# Text Splitting & Document Creation
 # ------------------------------
+splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)
+research_docs = splitter.create_documents(research_texts)
+development_docs = splitter.create_documents(development_texts)
 # ------------------------------
+# Creating Vector Stores with Embeddings
 # ------------------------------
+embeddings = OpenAIEmbeddings(
+    model="text-embedding-3-large",
+    # dimensions=1024  # Uncomment if needed
+)
+research_vectorstore = Chroma.from_documents(
+    documents=research_docs,
+    embedding=embeddings,
+    client=chroma_client,
+    collection_name="research_collection"
+)
+development_vectorstore = Chroma.from_documents(
+    documents=development_docs,
+    embedding=embeddings,
+    client=chroma_client,
+    collection_name="development_collection"
+)
+research_retriever = research_vectorstore.as_retriever()
+development_retriever = development_vectorstore.as_retriever()
 # ------------------------------
+# Creating Retriever Tools
 # ------------------------------
+research_tool = create_retriever_tool(
+    research_retriever,
+    "research_db_tool",
+    "Search information from the research database."
+)
+development_tool = create_retriever_tool(
+    development_retriever,
+    "development_db_tool",
+    "Search information from the development database."
+)
+tools = [research_tool, development_tool]
 # ------------------------------
+# Agent Function & Workflow Functions
 # ------------------------------
 class AgentState(TypedDict):
     messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
+def agent(state: AgentState):
+    print("---CALL AGENT---")
+    messages = state["messages"]
+    if isinstance(messages[0], tuple):
+        user_message = messages[0][1]
+    else:
+        user_message = messages[0].content
+    prompt = f"""Given this user question: "{user_message}"
+If it's about research or academic topics, respond EXACTLY in this format:
+SEARCH_RESEARCH: <search terms>
+If it's about development status, respond EXACTLY in this format:
+SEARCH_DEV: <search terms>
+Otherwise, just answer directly.
+"""
+    headers = {
+        "Accept": "application/json",
+        "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
+        "Content-Type": "application/json"
+    }
+    data = {
+        "model": "deepseek-chat",
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": 0.7,
+        "max_tokens": 1024
+    }
+    try:
+        response = requests.post(
+            "https://api.deepseek.com/v1/chat/completions",
+            headers=headers,
+            json=data,
+            verify=False,
+            timeout=30
         )
+        response.raise_for_status()
+        response_text = response.json()['choices'][0]['message']['content']
+        print("Raw response:", response_text)
+        if "SEARCH_RESEARCH:" in response_text:
+            query = response_text.split("SEARCH_RESEARCH:")[1].strip()
+            results = research_retriever.invoke(query)
+            return {"messages": [AIMessage(content=f'Action: research_db_tool\n{{"query": "{query}"}}\n\nResults: {str(results)}')]}
+        elif "SEARCH_DEV:" in response_text:
+            query = response_text.split("SEARCH_DEV:")[1].strip()
+            results = development_retriever.invoke(query)
+            return {"messages": [AIMessage(content=f'Action: development_db_tool\n{{"query": "{query}"}}\n\nResults: {str(results)}')]}
+        else:
+            return {"messages": [AIMessage(content=response_text)]}
+    except Exception as e:
+        error_msg = f"API Error: {str(e)}"
+        if "Insufficient Balance" in str(e):
+            error_msg += "\n\nPlease check your DeepSeek API account balance."
+        return {"messages": [AIMessage(content=error_msg)]}
+def simple_grade_documents(state: AgentState):
+    messages = state["messages"]
+    last_message = messages[-1]
+    print("Evaluating message:", last_message.content)
+    if "Results: [Document" in last_message.content:
+        print("---DOCS FOUND, GO TO GENERATE---")
+        return "generate"
+    else:
+        print("---NO DOCS FOUND, TRY REWRITE---")
+        return "rewrite"
+def generate(state: AgentState):
+    print("---GENERATE FINAL ANSWER---")
+    messages = state["messages"]
+    question = messages[0].content if isinstance(messages[0], tuple) else messages[0].content
+    last_message = messages[-1]
+    docs = ""
+    if "Results: [" in last_message.content:
+        results_start = last_message.content.find("Results: [")
+        docs = last_message.content[results_start:]
+    print("Documents found:", docs)
+    headers = {
+        "Accept": "application/json",
+        "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
+        "Content-Type": "application/json"
+    }
+    prompt = f"""Based on these research documents, summarize the latest advancements in AI:
+Question: {question}
+Documents: {docs}
+Focus on extracting and synthesizing the key findings from the research papers.
+"""
+    data = {
+        "model": "deepseek-chat",
+        "messages": [{
+            "role": "user",
+            "content": prompt
+        }],
+        "temperature": 0.7,
+        "max_tokens": 1024
+    }
+    try:
+        print("Sending generate request to API...")
         response = requests.post(
             "https://api.deepseek.com/v1/chat/completions",
+            headers=headers,
+            json=data,
+            verify=False,
+            timeout=30
+        )
+        response.raise_for_status()
+        response_text = response.json()['choices'][0]['message']['content']
+        print("Final Answer:", response_text)
+        return {"messages": [AIMessage(content=response_text)]}
+    except Exception as e:
+        error_msg = f"Generation Error: {str(e)}"
+        return {"messages": [AIMessage(content=error_msg)]}
+def rewrite(state: AgentState):
+    print("---REWRITE QUESTION---")
+    messages = state["messages"]
+    original_question = messages[0].content if len(messages) > 0 else "N/A"
+    headers = {
+        "Accept": "application/json",
+        "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
+        "Content-Type": "application/json"
+    }
+    data = {
+        "model": "deepseek-chat",
+        "messages": [{
+            "role": "user",
+            "content": f"Rewrite this question to be more specific and clearer: {original_question}"
+        }],
+        "temperature": 0.7,
+        "max_tokens": 1024
+    }
+    try:
+        print("Sending rewrite request...")
         response = requests.post(
             "https://api.deepseek.com/v1/chat/completions",
+            headers=headers,
+            json=data,
+            verify=False,
+            timeout=30
+        )
+        response.raise_for_status()
+        response_text = response.json()['choices'][0]['message']['content']
+        print("Rewritten question:", response_text)
+        return {"messages": [AIMessage(content=response_text)]}
+    except Exception as e:
+        error_msg = f"Rewrite Error: {str(e)}"
+        return {"messages": [AIMessage(content=error_msg)]}
+tools_pattern = re.compile(r"Action: .*")
+def custom_tools_condition(state: AgentState):
+    messages = state["messages"]
+    last_message = messages[-1]
+    content = last_message.content
+    print("Checking tools condition:", content)
+    if tools_pattern.match(content):
+        print("Moving to retrieve...")
+        return "tools"
+    print("Moving to END...")
+    return END
 # ------------------------------
+# Workflow Configuration using LangGraph
 # ------------------------------
+workflow = StateGraph(AgentState)
+# Add nodes
+workflow.add_node("agent", agent)
+retrieve_node = ToolNode(tools)
+workflow.add_node("retrieve", retrieve_node)
+workflow.add_node("rewrite", rewrite)
+workflow.add_node("generate", generate)
+# Set entry point
+workflow.set_entry_point("agent")
+# Define transitions
+workflow.add_conditional_edges(
+    "agent",
+    custom_tools_condition,
+    {
+        "tools": "retrieve",
+        END: END
+    }
+)
+workflow.add_conditional_edges(
+    "retrieve",
+    simple_grade_documents,
+    {
+        "generate": "generate",
+        "rewrite": "rewrite"
+    }
+)
+workflow.add_edge("generate", END)
+workflow.add_edge("rewrite", "agent")
+# Compile the workflow
+app = workflow.compile()
 # ------------------------------
+# Processing Function
 # ------------------------------
+def process_question(user_question, app, config):
+    """Process user question through the workflow"""
+    events = []
+    for event in app.stream({"messages": [("user", user_question)]}, config):
+        events.append(event)
+    return events
+# ------------------------------
+# Streamlit App UI (Dark Theme)
+# ------------------------------
+def main():
     st.set_page_config(
+        page_title="AI Research & Development Assistant",
         layout="wide",
         initial_sidebar_state="expanded"
     )
+    st.markdown("""
+    <style>
+    .stApp {
+        background-color: #1a1a1a;
+        color: #ffffff;
+    }
+    .stTextArea textarea {
+        background-color: #2d2d2d !important;
+        color: #ffffff !important;
+    }
+    .stButton > button {
+        background-color: #4CAF50;
+        color: white;
+        transition: all 0.3s;
+    }
+    .stButton > button:hover {
+        background-color: #45a049;
+        transform: scale(1.02);
+    }
+    .data-box {
+        background-color: #2d2d2d;
+        border-left: 5px solid #2196F3;
+    }
+    .dev-box {
+        border-left: 5px solid #4CAF50;
+    }
+    .st-expander {
+        background-color: #2d2d2d;
+        border: 1px solid #3d3d3d;
+    }
+    </style>
+    """, unsafe_allow_html=True)
+    with st.sidebar:
+        st.header("📚 Available Data")
+        st.subheader("Research Database")
+        for text in research_texts:
+            st.markdown(f'<div class="data-box research-box" style="padding: 15px; margin: 10px 0; border-radius: 5px;">{text}</div>', unsafe_allow_html=True)
+        st.subheader("Development Database")
+        for text in development_texts:
+            st.markdown(f'<div class="data-box dev-box" style="padding: 15px; margin: 10px 0; border-radius: 5px;">{text}</div>', unsafe_allow_html=True)
+    st.title("🤖 AI Research & Development Assistant")
+    st.markdown("---")
+    query = st.text_area("Enter your question:", height=100, placeholder="e.g., What is the latest advancement in AI research?")
+    col1, col2 = st.columns([1, 2])
+    with col1:
+        if st.button("🔍 Get Answer", use_container_width=True):
+            if query:
+                try:
+                    with st.spinner('Processing your question...'):
+                        events = process_question(query, app, {"configurable": {"thread_id": "1"}})
+                        for event in events:
+                            if 'agent' in event:
+                                with st.expander("🔄 Processing Step", expanded=True):
+                                    content = event['agent']['messages'][0].content
+                                    if "Error" in content:
+                                        st.error(content)
+                                    elif "Results:" in content:
+                                        st.markdown("### 📑 Retrieved Documents:")
+                                        docs_start = content.find("Results:")
+                                        docs = content[docs_start:]
+                                        st.info(docs)
+                            elif 'generate' in event:
+                                content = event['generate']['messages'][0].content
+                                if "Error" in content:
+                                    st.error(content)
+                                else:
+                                    st.markdown("### ✨ Final Answer:")
+                                    st.success(content)
+                except Exception as e:
+                    st.error(f"""
+                    **Processing Error**
+                    {str(e)}
+                    Please check:
+                    - API key configuration
+                    - Account balance
+                    - Network connection
+                    """)
+            else:
+                st.warning("⚠️ Please enter a question first!")
+    with col2:
+        st.markdown("""
+        ### 🎯 How to Use
+        1. Enter your question in the text box
+        2. Click the search button
+        3. Review processing steps
+        4. See final answer
+        ### 💡 Example Questions
+        - What's new in AI image recognition?
+        - How is Project B progressing?
+        - Recent machine learning trends?
+        """)
+if __name__ == "__main__":
+    main()