import gradio as gr import os from langchain.chains import ConversationalRetrievalChain from langchain.memory import ConversationBufferMemory from langchain_openai import ChatOpenAI, OpenAIEmbeddings from langchain.prompts import PromptTemplate from langchain_community.vectorstores import Chroma def create_qa_chain(): """ Create the QA chain with the loaded vectorstore """ # Initialize embeddings and load vectorstore embeddings = OpenAIEmbeddings() vectorstore = Chroma( persist_directory="./vectorstore", embedding_function=embeddings ) # Set up retriever retriever = vectorstore.as_retriever( search_type="mmr", search_kwargs={ "k": 6, "fetch_k": 20, "lambda_mult": 0.3, } ) # Set up memory memory = ConversationBufferMemory( memory_key="chat_history", return_messages=True, output_key='answer' ) # Create prompt template qa_prompt = PromptTemplate.from_template("""You are an expert technical writer specializing in API documentation. When describing API endpoints, structure your response in this exact format: 1. Start with the HTTP method and base URI structure 2. List all key parameters with: - Parameter name in bold (**parameter**) - Type and requirement status - Clear description - Example values where applicable 3. Show complete example requests with: - Basic example - Full example with all parameters - Headers included 4. Include any relevant response information Use markdown formatting for: - Code blocks with syntax highlighting - Bold text for important terms - Clear section separation Context: {context} Question: {question} Technical answer (following the exact structure above):""") # Create the chain qa_chain = ConversationalRetrievalChain.from_llm( llm=ChatOpenAI( temperature=0.1, model_name="gpt-4-turbo-preview" ), retriever=retriever, memory=memory, return_source_documents=True, combine_docs_chain_kwargs={"prompt": qa_prompt}, verbose=False ) return qa_chain def chat(message, history): """ Process chat messages and return responses """ # Get or create QA chain if not hasattr(chat, 'qa_chain'): chat.qa_chain = create_qa_chain() # Get response result = chat.qa_chain({"question": message}) # Format sources sources = "\n\nSources:\n" seen_components = set() shown_sources = 0 for doc in result["source_documents"]: component = doc.metadata.get('component', '') title = doc.metadata.get('title', '') combo = (component, title) if combo not in seen_components and shown_sources < 3: seen_components.add(combo) shown_sources += 1 sources += f"\nSource {shown_sources}:\n" sources += f"Title: {title}\n" sources += f"Component: {component}\n" sources += f"Content: {doc.page_content[:300]}...\n" # Combine response with sources full_response = result["answer"] + sources return full_response demo = gr.ChatInterface( chat, title="Apple Music API Documentation Assistant", description="Ask questions about the Apple Music API documentation.", examples=[ "How to search for songs on Apple Music API?", "What are the required parameters for searching songs?", "Show me an example request with all parameters" ] ) if __name__ == "__main__": demo.launch()