Spaces:

HaryaniAnjali
/

GenerativeAI-QA-Using-Lanchain

Sleeping

App Files Files Community

HaryaniAnjali commited on Apr 4, 2025

Commit

633adde

verified ·

1 Parent(s): 5d4f3f5

Update app.py

Browse files

Files changed (1) hide show

app.py +160 -104

app.py CHANGED Viewed

@@ -1,129 +1,184 @@
 import os
 import gradio as gr
-from langchain.chat_models import ChatOpenAI
-from langchain.document_loaders import WikipediaLoader
-from langchain.text_splitter import CharacterTextSplitter
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.vectorstores import FAISS
 from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
-from langchain.embeddings import OpenAIEmbeddings
-# Simple memory cache
 class MemoryCache:
     def __init__(self):
         self.cache = {}
-    def get(self, query):
-        return self.cache.get(query)
-    def set(self, query, response):
         self.cache[query] = response
-# Function to extract specific sections (simplified)
-def extract_section(query, content):
-    query_lower = query.lower()
-    if "early history" in query_lower:
-        return "Information about the early history of Generative AI would appear here."
-    elif "generative models" in query_lower:
-        return "Information about generative models would appear here."
-    elif "academic artificial intelligence" in query_lower:
-        return "Information about academic artificial intelligence would appear here."
-    return None
-# Main QA class (simplified)
 class GenAIQASystem:
     def __init__(self):
         self.cache = MemoryCache()
         self.content = None
         self.qa_chain = None
-        self.initialized = False
-        self.memory = ConversationBufferMemory(
-            memory_key="chat_history",
-            return_messages=True
-        )
-    def initialize(self, api_key=None):
-        if api_key:
-            os.environ["OPENAI_API_KEY"] = api_key
-        if not api_key and "OPENAI_API_KEY" not in os.environ:
-            return False, "OpenAI API key is not set"
-        if self.initialized:
-            return True, "System already initialized"
         try:
-            # Initialize with placeholder content for faster startup
-            self.content = "This is placeholder content for Generative AI."
-            self.initialized = True
-            return True, "System initialized successfully"
         except Exception as e:
-            return False, f"Error initializing system: {str(e)}"
     def load_wikipedia(self):
-        if not self.initialized:
-            return "System not initialized. Please set your OpenAI API key first."
         try:
-            # Loading Wikipedia page for Generative AI
-            loader = WikipediaLoader("Generative artificial intelligence")
-            docs = loader.load()
-            self.content = docs[0].page_content
             # Split content into chunks
-            text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
-            texts = text_splitter.split_text(self.content)
             # Create vector store
             embeddings = OpenAIEmbeddings()
-            vectorstore = FAISS.from_texts(texts, embeddings)
-            # Set up QA chain
-            llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
             self.qa_chain = ConversationalRetrievalChain.from_llm(
-                llm=llm,
                 retriever=vectorstore.as_retriever(),
-                memory=self.memory
             )
             return "Wikipedia content loaded successfully!"
         except Exception as e:
-            return f"Error loading Wikipedia content: {str(e)}"
     def process_query(self, query):
-        if not self.initialized:
-            return "System not initialized. Please set your OpenAI API key first."
-        # Check cache
         cached_answer = self.cache.get(query)
         if cached_answer:
-            return f"[Cache] Answer:\n{cached_answer}"
-        # Check for section extraction
-        if self.content:
-            extracted_section = extract_section(query, self.content)
-            if extracted_section:
-                self.cache.set(query, extracted_section)
-                return f"[Function Calling] Section from content:\n{extracted_section}"
-        # If QA chain is ready, use it
-        if self.qa_chain:
-            try:
-                result = self.qa_chain({"question": query})
-                answer = result.get("answer", "No answer found")
-                self.cache.set(query, answer)
-                return answer
-            except Exception as e:
-                return f"Error processing query: {str(e)}"
-        else:
-            # If Wikipedia content isn't loaded yet
-            return "Please load Wikipedia content first by clicking 'Load Wikipedia' in the Settings tab."
 # Initialize system
 qa_system = GenAIQASystem()
-# Gradio interface
 with gr.Blocks(title="Generative AI Q/A System") as demo:
     gr.Markdown("# Generative AI Q/A System")
     gr.Markdown("Ask questions about Generative AI using this LangChain-based Q/A system")
@@ -132,40 +187,41 @@ with gr.Blocks(title="Generative AI Q/A System") as demo:
         chatbot = gr.Chatbot()
         msg = gr.Textbox(label="Your Question")
         clear = gr.Button("Clear")
-        def respond(message, history):
-            try:
-                response = qa_system.process_query(message)
-                return "", history + [(message, response)]
-            except Exception as e:
-                error_message = f"Error processing query: {str(e)}"
-                return "", history + [(message, error_message)]
-        msg.submit(respond, [msg, chatbot], [chatbot])
-        clear.click(lambda: None, None, chatbot, queue=False)
     with gr.Tab("Settings"):
-        api_key_input = gr.Textbox(type="password", label="OpenAI API Key")
-        api_submit = gr.Button("Set API Key")
-        api_status = gr.Textbox(label="Status")
-        load_wiki = gr.Button("Load Wikipedia Content")
-        wiki_status = gr.Textbox(label="Wikipedia Status")
-        def set_api_key(api_key):
-            success, message = qa_system.initialize(api_key)
-            return message
-        api_submit.click(set_api_key, [api_key_input], [api_status])
-        load_wiki.click(qa_system.load_wikipedia, [], wiki_status)
     gr.Markdown("## About")
     gr.Markdown("""
     This Q/A system uses LangChain and OpenAI to answer questions based on the Wikipedia page about Generative AI.
-    Created by Anjali Haryani (Modified for Hugging Face deployment)
     """)
-# Launch the app
 if __name__ == "__main__":
     demo.launch()

 import os
+import logging
 import gradio as gr
 from langchain.chains import ConversationalRetrievalChain
+from langchain_openai import ChatOpenAI
 from langchain.memory import ConversationBufferMemory
+from langchain_community.vectorstores import FAISS
+from langchain_openai import OpenAIEmbeddings
+from langchain_community.document_loaders import WikipediaLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.callbacks.base import BaseCallbackHandler
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Memory cache for storing answers
 class MemoryCache:
     def __init__(self):
         self.cache = {}
+    def get(self, query: str):
+        if query in self.cache:
+            logger.info(f"Cache hit: {query}")
+            return self.cache.get(query)
+        return None
+    def set(self, query: str, response: str):
+        logger.info(f"Saving to cache: {query}")
         self.cache[query] = response
+# Callback handler for logging
+class LoggingCallbackHandler(BaseCallbackHandler):
+    def on_chain_start(self, serialized, inputs, **kwargs):
+        logger.info(f"Chain start. Inputs: {inputs}")
+    def on_chain_end(self, outputs, **kwargs):
+        logger.info(f"Chain end. Outputs: {outputs}")
+    def on_retriever_start(self, *args, **kwargs):
+        logger.info("Retrieval start.")
+    def on_retriever_end(self, *args, **kwargs):
+        logger.info("Retrieval end.")
+    def on_llm_start(self, *args, **kwargs):
+        logger.info("LLM start.")
+    def on_llm_end(self, result, *args, **kwargs):
+        try:
+            final_text = result.generations[0][0].text
+            logger.info(f"LLM end. Text: {final_text}")
+        except Exception as e:
+            logger.error(f"LLM error: {e}")
 class GenAIQASystem:
     def __init__(self):
         self.cache = MemoryCache()
+        self.callback_handler = LoggingCallbackHandler()
         self.content = None
         self.qa_chain = None
+        self.memory = None
+        self.wiki_loaded = False
+        self.api_key_set = False
+    def set_api_key(self, api_key):
+        if not api_key:
+            return "Please provide a valid API key."
         try:
+            os.environ["OPENAI_API_KEY"] = api_key
+            # Test if API key works
+            embeddings = OpenAIEmbeddings()
+            embeddings.embed_query("Test")
+            self.api_key_set = True
+            return "API key set successfully!"
         except Exception as e:
+            logger.error(f"API key error: {e}")
+            return f"Error setting API key: {str(e)}"
     def load_wikipedia(self):
+        if not self.api_key_set:
+            return "Please set your OpenAI API key first."
+        if self.wiki_loaded:
+            return "Wikipedia content already loaded."
         try:
+            logger.info("Loading Wikipedia content for Generative artificial intelligence")
+            # Load Wikipedia content
+            loader = WikipediaLoader(query="Generative artificial intelligence", lang="en")
+            documents = loader.load()
+            self.content = documents[0].page_content
             # Split content into chunks
+            text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+            chunks = text_splitter.split_text(self.content)
             # Create vector store
             embeddings = OpenAIEmbeddings()
+            vectorstore = FAISS.from_texts(chunks, embeddings)
+            # Initialize memory
+            self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+            # Create QA Chain
+            llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
             self.qa_chain = ConversationalRetrievalChain.from_llm(
+                llm,
                 retriever=vectorstore.as_retriever(),
+                memory=self.memory,
+                callbacks=[self.callback_handler]
             )
+            self.wiki_loaded = True
             return "Wikipedia content loaded successfully!"
         except Exception as e:
+            logger.error(f"Error loading Wikipedia: {e}")
+            return f"Error loading Wikipedia: {str(e)}"
+    def extract_section(self, query: str):
+        """Extracts a specific section from the Wikipedia content."""
+        if not self.content:
+            return None
+        query_lower = query.lower()
+        content_lower = self.content.lower()
+        # Dictionary of section headers to look for
+        sections = {
+            "early history": "== early history ==",
+            "generative models": "== generative models ==",
+            "academic artificial intelligence": "== academic artificial intelligence =="
+        }
+        # Check if query matches any section
+        for key, header in sections.items():
+            if key in query_lower:
+                start_index = content_lower.find(header)
+                if start_index != -1:
+                    logger.info(f"Found header: {header}")
+                    end_index = self.content.find("\n==", start_index + len(header))
+                    section_text = self.content[start_index:end_index].strip() if end_index != -1 else self.content[start_index:].strip()
+                    return section_text
+        return None
     def process_query(self, query):
+        if not self.api_key_set:
+            return "Please set your OpenAI API key in the Settings tab first."
+        if not self.wiki_loaded:
+            return "Please load Wikipedia content in the Settings tab first."
+        # Check cache first
         cached_answer = self.cache.get(query)
         if cached_answer:
+            return cached_answer
+        # Try to extract a specific section
+        extracted_section = self.extract_section(query)
+        if extracted_section:
+            self.cache.set(query, extracted_section)
+            return f"[Section Found] {extracted_section}"
+        # Use the QA chain
+        try:
+            logger.info(f"Processing query: {query}")
+            result = self.qa_chain.invoke({"question": query})
+            answer = result.get("answer", "No answer found")
+            self.cache.set(query, answer)
+            return answer
+        except Exception as e:
+            logger.error(f"Error in QA chain: {e}")
+            return f"Error processing query: {str(e)}"
 # Initialize system
 qa_system = GenAIQASystem()
+# Define Gradio interface
 with gr.Blocks(title="Generative AI Q/A System") as demo:
     gr.Markdown("# Generative AI Q/A System")
     gr.Markdown("Ask questions about Generative AI using this LangChain-based Q/A system")
         chatbot = gr.Chatbot()
         msg = gr.Textbox(label="Your Question")
         clear = gr.Button("Clear")
+        def respond(message, history):
+            response = qa_system.process_query(message)
+            history.append((message, response))
+            return "", history
+        msg.submit(respond, [msg, chatbot], [msg, chatbot])
+        clear.click(lambda: [], None, chatbot, queue=False)
     with gr.Tab("Settings"):
+        with gr.Group():
+            gr.Markdown("### Step 1: Set OpenAI API Key")
+            api_key_input = gr.Textbox(type="password", label="OpenAI API Key")
+            api_submit = gr.Button("Set API Key")
+            api_status = gr.Textbox(label="API Status", interactive=False)
+        with gr.Group():
+            gr.Markdown("### Step 2: Load Wikipedia Content")
+            load_wiki_button = gr.Button("Load Wikipedia Content")
+            wiki_status = gr.Textbox(label="Loading Status", interactive=False)
+        api_submit.click(qa_system.set_api_key, [api_key_input], [api_status])
+        load_wiki_button.click(qa_system.load_wikipedia, [], [wiki_status])
     gr.Markdown("## About")
     gr.Markdown("""
     This Q/A system uses LangChain and OpenAI to answer questions based on the Wikipedia page about Generative AI.
+    Features:
+    - Caching mechanism to avoid repeating work
+    - Function calls to extract specific sections
+    - Logging to track processing
+    Created by Anjali Haryani
     """)
 if __name__ == "__main__":
     demo.launch()