Spaces:

ahmadsanafarooq
/

RagLearningAssistant

Sleeping

App Files Files Community

ahmadsanafarooq commited on Jul 25, 2025

Commit

db840b8

verified ·

1 Parent(s): eccecc3

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -47

app.py CHANGED Viewed

@@ -1,23 +1,23 @@
 import os
 import gradio as gr
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.vectorstores import Chroma
-from langchain.chains import RetrievalQA
-from langchain_groq import ChatGroq
-from langchain_community.document_loaders import TextLoader, PyPDFLoader
-from langchain.schema import Document
 from pathlib import Path
 from typing import List
 import logging
 import numpy as np
 from sklearn.feature_extraction.text import TfidfVectorizer
-from dotenv import load_dotenv
-# Logger Configuration
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Simple TF-IDF Fallback Embeddings
 class SimpleEmbeddings:
     def __init__(self):
         self.vectorizer = TfidfVectorizer(max_features=384, stop_words='english')
@@ -34,7 +34,7 @@ class SimpleEmbeddings:
             return [0.0] * 384
         return self.vectorizer.transform([text]).toarray()[0].tolist()
-# RAG Assistant Class
 class RAGAssistant:
     def __init__(self, groq_api_key: str):
         self.groq_api_key = groq_api_key
@@ -58,7 +58,7 @@ class RAGAssistant:
                         model_kwargs={'device': 'cpu'},
                         encode_kwargs={'normalize_embeddings': False}
                     )
-                    print(f"Loaded HuggingFace model: {model_name}")
                     return embeddings
                 except Exception as e:
                     print(f"Failed to load {model_name}: {e}")
@@ -79,7 +79,7 @@ class RAGAssistant:
                 collection_name="code_documentation"
             )
         except Exception as e:
-            logger.error(f"Vector store init error: {str(e)}")
     def load_documents(self, files: List[str], assistant_type: str) -> str:
         try:
@@ -88,27 +88,22 @@ class RAGAssistant:
             for file_path in files:
                 print(f"Trying to load: {file_path}")
-                print("File exists?", os.path.exists(file_path))
                 try:
                     if file_path.lower().endswith('.pdf'):
-                        loader = PyPDFLoader(file_path)
                     else:
                         loader = TextLoader(file_path, encoding='utf-8')
                     docs = loader.load()
-                    print(f"Loaded {len(docs)} docs from: {file_path}")
-                    for doc in docs[:1]:
-                        print("Preview:", doc.page_content[:100])
                     documents.extend(docs)
                 except Exception as e:
-                    logger.error(f"Error loading {file_path}: {e}")
                     continue
             if not documents:
                 return "❌ No documents could be loaded. Please check your file type or content."
             chunks = self.text_splitter.split_documents(documents)
-            print(f"Total chunks created: {len(chunks)}")
             for chunk in chunks:
                 chunk.metadata['assistant_type'] = assistant_type
@@ -124,12 +119,12 @@ class RAGAssistant:
         except Exception as e:
             logger.error(f"Error loading documents: {str(e)}")
-            return f"❌ Error loading documents: {str(e)}"
     def get_learning_tutor_response(self, question: str) -> str:
         try:
             if not self.learning_vectorstore:
-                return "⚠️ Please upload some learning materials first."
             qa_chain = RetrievalQA.from_chain_type(
                 llm=self.llm,
@@ -138,7 +133,10 @@ class RAGAssistant:
                 return_source_documents=True
             )
-            result = qa_chain({"query": question})
             response = result['result']
             if result.get('source_documents'):
@@ -148,14 +146,15 @@ class RAGAssistant:
                     response += f"- {Path(source).name}\n"
             return response
         except Exception as e:
-            logger.error(f"Error in learning tutor: {str(e)}")
             return f"❌ Error: {str(e)}"
     def get_code_helper_response(self, question: str) -> str:
         try:
             if not self.code_vectorstore:
-                return "⚠️ Please upload some code documentation first."
             qa_chain = RetrievalQA.from_chain_type(
                 llm=self.llm,
@@ -164,7 +163,10 @@ class RAGAssistant:
                 return_source_documents=True
             )
-            result = qa_chain({"query": question})
             response = result['result']
             if result.get('source_documents'):
@@ -174,22 +176,23 @@ class RAGAssistant:
                     response += f"- {Path(source).name}\n"
             return response
         except Exception as e:
-            logger.error(f"Error in code helper: {str(e)}")
             return f"❌ Error: {str(e)}"
-# Gradio UI
 def create_gradio_interface(assistant: RAGAssistant):
     def upload_learning_files(files):
         if not files:
             return "No files uploaded."
-        file_paths = [f.name for f in files]  # ✅ FIXED HERE
         return assistant.load_documents(file_paths, "learning")
     def upload_code_files(files):
         if not files:
             return "No files uploaded."
-        file_paths = [f.name for f in files]  # ✅ FIXED HERE
         return assistant.load_documents(file_paths, "code")
     def learning_chat(message, history):
@@ -207,53 +210,53 @@ def create_gradio_interface(assistant: RAGAssistant):
         return history, ""
     with gr.Blocks(title="RAG-Based Learning & Code Assistant", theme=gr.themes.Soft()) as demo:
-        gr.Markdown("# 📚 RAG-Based Learning & Code Assistant")
         with gr.Tabs():
-            with gr.TabItem("📘 Learning Tutor"):
                 with gr.Row():
                     with gr.Column(scale=1):
                         learning_files = gr.File(label="Upload Materials", file_count="multiple", file_types=[".pdf", ".txt", ".md"])
-                        learning_upload_btn = gr.Button("Upload", variant="primary")
                         learning_status = gr.Textbox(label="Upload Status", interactive=False)
                     with gr.Column(scale=2):
-                        learning_chatbot = gr.Chatbot(label="Learning Chat", height=400)
-                        learning_input = gr.Textbox(label="Ask your question", placeholder="e.g. What is overfitting?")
-                        learning_submit = gr.Button("Ask", variant="primary")
                 learning_upload_btn.click(upload_learning_files, inputs=[learning_files], outputs=[learning_status])
                 learning_submit.click(learning_chat, inputs=[learning_input, learning_chatbot], outputs=[learning_chatbot, learning_input])
                 learning_input.submit(learning_chat, inputs=[learning_input, learning_chatbot], outputs=[learning_chatbot, learning_input])
-            with gr.TabItem("💻 Code Helper"):
                 with gr.Row():
                     with gr.Column(scale=1):
-                        code_files = gr.File(label="Upload Docs", file_count="multiple", file_types=[".pdf", ".txt", ".md", ".py", ".json"])
-                        code_upload_btn = gr.Button("Upload", variant="primary")
                         code_status = gr.Textbox(label="Upload Status", interactive=False)
                     with gr.Column(scale=2):
                         code_chatbot = gr.Chatbot(label="Code Chat", height=400)
-                        code_input = gr.Textbox(label="Ask question", placeholder="e.g. How to call this API?")
-                        code_submit = gr.Button("Ask", variant="primary")
                 code_upload_btn.click(upload_code_files, inputs=[code_files], outputs=[code_status])
                 code_submit.click(code_chat, inputs=[code_input, code_chatbot], outputs=[code_chatbot, code_input])
                 code_input.submit(code_chat, inputs=[code_input, code_chatbot], outputs=[code_chatbot, code_input])
-        gr.Markdown("Built with ❤️ using LangChain, ChromaDB, and Groq")
     return demo
-# Main
 def main():
     load_dotenv()
     groq_api_key = os.getenv("GROQ_API_KEY")
     if not groq_api_key:
-        print("❌ Please set your GROQ_API_KEY in .env or environment.")
         return
     assistant = RAGAssistant(groq_api_key)
     demo = create_gradio_interface(assistant)
     demo.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)
 if __name__ == "__main__":
-    main()

 import os
 import gradio as gr
 from pathlib import Path
 from typing import List
 import logging
+from dotenv import load_dotenv
 import numpy as np
 from sklearn.feature_extraction.text import TfidfVectorizer
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import Chroma
+from langchain.chains import RetrievalQA
+from langchain_groq import ChatGroq
+from langchain.schema import Document
+from langchain_community.document_loaders import TextLoader, UnstructuredPDFLoader
+# ----------------- Logger Setup -----------------
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# ----------------- Fallback Embeddings -----------------
 class SimpleEmbeddings:
     def __init__(self):
         self.vectorizer = TfidfVectorizer(max_features=384, stop_words='english')
             return [0.0] * 384
         return self.vectorizer.transform([text]).toarray()[0].tolist()
+# ----------------- RAG Assistant Class -----------------
 class RAGAssistant:
     def __init__(self, groq_api_key: str):
         self.groq_api_key = groq_api_key
                         model_kwargs={'device': 'cpu'},
                         encode_kwargs={'normalize_embeddings': False}
                     )
+                    print(f"Loaded: {model_name}")
                     return embeddings
                 except Exception as e:
                     print(f"Failed to load {model_name}: {e}")
                 collection_name="code_documentation"
             )
         except Exception as e:
+            logger.error(f"Error initializing vector stores: {str(e)}")
     def load_documents(self, files: List[str], assistant_type: str) -> str:
         try:
             for file_path in files:
                 print(f"Trying to load: {file_path}")
                 try:
                     if file_path.lower().endswith('.pdf'):
+                        loader = UnstructuredPDFLoader(file_path)
                     else:
                         loader = TextLoader(file_path, encoding='utf-8')
                     docs = loader.load()
                     documents.extend(docs)
                 except Exception as e:
+                    print(f"Error loading {file_path}: {e}")
                     continue
             if not documents:
                 return "❌ No documents could be loaded. Please check your file type or content."
             chunks = self.text_splitter.split_documents(documents)
+            print(f"Chunks created: {len(chunks)}")
             for chunk in chunks:
                 chunk.metadata['assistant_type'] = assistant_type
         except Exception as e:
             logger.error(f"Error loading documents: {str(e)}")
+            return f"Error loading documents: {str(e)}"
     def get_learning_tutor_response(self, question: str) -> str:
         try:
             if not self.learning_vectorstore:
+                return "⚠️ Upload learning materials first."
             qa_chain = RetrievalQA.from_chain_type(
                 llm=self.llm,
                 return_source_documents=True
             )
+            prompt = f"""You are an educational assistant. Help the student understand the topic:
+            Question: {question}"""
+            result = qa_chain({"query": prompt})
             response = result['result']
             if result.get('source_documents'):
                     response += f"- {Path(source).name}\n"
             return response
         except Exception as e:
+            logger.error(f"Learning tutor error: {str(e)}")
             return f"❌ Error: {str(e)}"
     def get_code_helper_response(self, question: str) -> str:
         try:
             if not self.code_vectorstore:
+                return "⚠️ Upload code documentation first."
             qa_chain = RetrievalQA.from_chain_type(
                 llm=self.llm,
                 return_source_documents=True
             )
+            prompt = f"""You are a code documentation assistant. Help the developer understand the code:
+            Question: {question}"""
+            result = qa_chain({"query": prompt})
             response = result['result']
             if result.get('source_documents'):
                     response += f"- {Path(source).name}\n"
             return response
         except Exception as e:
+            logger.error(f"Code helper error: {str(e)}")
             return f"❌ Error: {str(e)}"
+# ----------------- Gradio Interface -----------------
 def create_gradio_interface(assistant: RAGAssistant):
     def upload_learning_files(files):
         if not files:
             return "No files uploaded."
+        file_paths = [f.path for f in files]
         return assistant.load_documents(file_paths, "learning")
     def upload_code_files(files):
         if not files:
             return "No files uploaded."
+        file_paths = [f.path for f in files]
         return assistant.load_documents(file_paths, "code")
     def learning_chat(message, history):
         return history, ""
     with gr.Blocks(title="RAG-Based Learning & Code Assistant", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# 🎓 RAG-Based Learning & Code Assistant")
+        gr.Markdown("Upload your documents and ask intelligent questions.")
         with gr.Tabs():
+            with gr.TabItem("📚 Learning Tutor"):
                 with gr.Row():
                     with gr.Column(scale=1):
                         learning_files = gr.File(label="Upload Materials", file_count="multiple", file_types=[".pdf", ".txt", ".md"])
+                        learning_upload_btn = gr.Button("Upload")
                         learning_status = gr.Textbox(label="Upload Status", interactive=False)
                     with gr.Column(scale=2):
+                        learning_chatbot = gr.Chatbot(label="Tutor Chat", height=400)
+                        learning_input = gr.Textbox(label="Ask a question", placeholder="What is supervised learning?")
+                        learning_submit = gr.Button("Ask")
                 learning_upload_btn.click(upload_learning_files, inputs=[learning_files], outputs=[learning_status])
                 learning_submit.click(learning_chat, inputs=[learning_input, learning_chatbot], outputs=[learning_chatbot, learning_input])
                 learning_input.submit(learning_chat, inputs=[learning_input, learning_chatbot], outputs=[learning_chatbot, learning_input])
+            with gr.TabItem("💻 Code Documentation Helper"):
                 with gr.Row():
                     with gr.Column(scale=1):
+                        code_files = gr.File(label="Upload Code Docs", file_count="multiple", file_types=[".pdf", ".txt", ".md", ".py", ".js", ".json"])
+                        code_upload_btn = gr.Button("Upload")
                         code_status = gr.Textbox(label="Upload Status", interactive=False)
                     with gr.Column(scale=2):
                         code_chatbot = gr.Chatbot(label="Code Chat", height=400)
+                        code_input = gr.Textbox(label="Ask about the codebase", placeholder="How does this API authenticate users?")
+                        code_submit = gr.Button("Ask")
                 code_upload_btn.click(upload_code_files, inputs=[code_files], outputs=[code_status])
                 code_submit.click(code_chat, inputs=[code_input, code_chatbot], outputs=[code_chatbot, code_input])
                 code_input.submit(code_chat, inputs=[code_input, code_chatbot], outputs=[code_chatbot, code_input])
+        gr.Markdown("---")
+        gr.Markdown("🔧 Powered by LangChain, ChromaDB, and Groq")
     return demo
+# ----------------- Main -----------------
 def main():
     load_dotenv()
     groq_api_key = os.getenv("GROQ_API_KEY")
     if not groq_api_key:
+        print("Set your GROQ_API_KEY in the .env file.")
         return
     assistant = RAGAssistant(groq_api_key)
     demo = create_gradio_interface(assistant)
     demo.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)
 if __name__ == "__main__":
+    main()