Spaces:

Flutra
/

musicAPI

Build error

App Files Files Community

Flutra commited on Dec 17, 2024

Commit

66c3a38

1 Parent(s): 0760d7d

add streaming and memory management

Browse files

Files changed (1) hide show

app.py +69 -45

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 import os
 from langchain.chains import ConversationalRetrievalChain
@@ -5,19 +6,26 @@ from langchain.memory import ConversationBufferMemory
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from langchain.prompts import PromptTemplate
 from langchain_community.vectorstores import Chroma
-def create_qa_chain():
     """
-    Create the QA chain with the loaded vectorstore
     """
-    # Initialize embeddings and load vectorstore
     embeddings = OpenAIEmbeddings()
     vectorstore = Chroma(
         persist_directory="./vectorstore",
         embedding_function=embeddings
     )
-    # Set up retriever
     retriever = vectorstore.as_retriever(
         search_type="mmr",
         search_kwargs={
@@ -27,14 +35,12 @@ def create_qa_chain():
         }
     )
-    # Set up memory
     memory = ConversationBufferMemory(
         memory_key="chat_history",
         return_messages=True,
         output_key='answer'
     )
-    # Create prompt template
     qa_prompt = PromptTemplate.from_template("""You are an expert technical writer specializing in API documentation.
 When describing API endpoints, structure your response in this exact format:
@@ -61,11 +67,12 @@ Question: {question}
 Technical answer (following the exact structure above):""")
-    # Create the chain
     qa_chain = ConversationalRetrievalChain.from_llm(
         llm=ChatOpenAI(
             temperature=0.1,
-            model_name="gpt-4-turbo-preview"
         ),
         retriever=retriever,
         memory=memory,
@@ -76,50 +83,67 @@ Technical answer (following the exact structure above):""")
     return qa_chain
-def chat(message, history):
     """
-    Process chat messages and return responses
     """
-    # Get or create QA chain
-    if not hasattr(chat, 'qa_chain'):
-        chat.qa_chain = create_qa_chain()
-    # Get response
-    result = chat.qa_chain({"question": message})
-    # Format sources
-    sources = "\n\nSources:\n"
-    seen_components = set()
-    shown_sources = 0
-    for doc in result["source_documents"]:
-        component = doc.metadata.get('component', '')
-        title = doc.metadata.get('title', '')
-        combo = (component, title)
-        if combo not in seen_components and shown_sources < 3:
-            seen_components.add(combo)
-            shown_sources += 1
-            sources += f"\nSource {shown_sources}:\n"
-            sources += f"Title: {title}\n"
-            sources += f"Component: {component}\n"
-            sources += f"Content: {doc.page_content[:300]}...\n"
-    # Combine response with sources
-    full_response = result["answer"] + sources
-    return full_response
-demo = gr.ChatInterface(
-    chat,
-    title="Apple Music API Documentation Assistant",
-    description="Ask questions about the Apple Music API documentation.",
-    examples=[
-        "How to search for songs on Apple Music API?",
-        "What are the required parameters for searching songs?",
-        "Show me an example request with all parameters"
-    ]
-)
 if __name__ == "__main__":
-    demo.launch()

+```python
 import gradio as gr
 import os
 from langchain.chains import ConversationalRetrievalChain
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from langchain.prompts import PromptTemplate
 from langchain_community.vectorstores import Chroma
+from queue import Queue
+from threading import Thread
+class StreamHandler:
+    def __init__(self, queue):
+        self.queue = queue
+    def on_llm_new_token(self, token):
+        self.queue.put(token)
+def create_qa_chain(streaming_handler=None):
     """
+    Create the QA chain with streaming capability
     """
     embeddings = OpenAIEmbeddings()
     vectorstore = Chroma(
         persist_directory="./vectorstore",
         embedding_function=embeddings
     )
     retriever = vectorstore.as_retriever(
         search_type="mmr",
         search_kwargs={
         }
     )
     memory = ConversationBufferMemory(
         memory_key="chat_history",
         return_messages=True,
         output_key='answer'
     )
     qa_prompt = PromptTemplate.from_template("""You are an expert technical writer specializing in API documentation.
 When describing API endpoints, structure your response in this exact format:
 Technical answer (following the exact structure above):""")
     qa_chain = ConversationalRetrievalChain.from_llm(
         llm=ChatOpenAI(
             temperature=0.1,
+            model_name="gpt-4-turbo-preview",
+            streaming=True,
+            callbacks=[streaming_handler] if streaming_handler else None
         ),
         retriever=retriever,
         memory=memory,
     return qa_chain
+def predict(message, history):
     """
+    Process each message with streaming
     """
+    token_queue = Queue()
+    stream_handler = StreamHandler(token_queue)
+    # Create new QA chain for each conversation to ensure fresh memory
+    qa_chain = create_qa_chain(stream_handler)
+    # Function to process the message and add to queue
+    def get_response():
+        result = qa_chain({"question": message})
+        # Add sources to queue
+        sources = "\n\nSources:\n"
+        seen_components = set()
+        shown_sources = 0
+        for doc in result["source_documents"]:
+            component = doc.metadata.get('component', '')
+            title = doc.metadata.get('title', '')
+            combo = (component, title)
+            if combo not in seen_components and shown_sources < 3:
+                seen_components.add(combo)
+                shown_sources += 1
+                sources += f"\nSource {shown_sources}:\n"
+                sources += f"Title: {title}\n"
+                sources += f"Component: {component}\n"
+                sources += f"Content: {doc.page_content[:300]}...\n"
+        for char in sources:
+            token_queue.put(char)
+        token_queue.put(None)  # Signal end of response
+    # Start processing in a separate thread
+    thread = Thread(target=get_response)
+    thread.start()
+    # Stream the response
+    response = ""
+    while True:
+        token = token_queue.get()
+        if token is None:
+            break
+        response += token
+        yield response
+# Create the Gradio interface
+with gr.Blocks() as demo:
+    chatbot = gr.ChatInterface(
+        predict,
+        title="Apple Music API Documentation Assistant",
+        description="Ask questions about the Apple Music API documentation.",
+        examples=[
+            "How to search for songs on Apple Music API?",
+            "What are the required parameters for searching songs?",
+            "Show me an example request with all parameters"
+        ]
+    )
 if __name__ == "__main__":
+    demo.queue().launch()
+```