Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_groq import ChatGroq | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.chains import RetrievalQA | |
| from langchain.docstore.document import Document | |
| import nbformat | |
| # Load Groq API Key securely | |
| os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY") | |
| # Helper: Read .ipynb file and extract text | |
| def load_ipynb(file): | |
| try: | |
| with open(file.name, "r", encoding="utf-8") as f: | |
| nb = nbformat.read(f, as_version=nbformat.NO_CONVERT) | |
| text = "" | |
| for cell in nb.cells: | |
| if cell.cell_type in ["markdown", "code"]: | |
| text += cell.source + "\n\n" | |
| return [Document(page_content=text)] | |
| except Exception as e: | |
| print("Error loading .ipynb:", e) | |
| return [] | |
| # Helper: Read PDF or IPYNB and build retriever chain | |
| def process_files(files): | |
| try: | |
| all_docs = [] | |
| for file in files: | |
| if file.name.endswith(".pdf"): | |
| loader = PyPDFLoader(file.name) | |
| all_docs.extend(loader.load()) | |
| elif file.name.endswith(".ipynb"): | |
| all_docs.extend(load_ipynb(file)) | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| chunks = splitter.split_documents(all_docs) | |
| embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
| vectorstore = FAISS.from_documents(chunks, embeddings) | |
| retriever = vectorstore.as_retriever() | |
| llm = ChatGroq(model_name="llama3-70b-8192", temperature=0) | |
| qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) | |
| return qa_chain | |
| except Exception as e: | |
| print("Error in processing files:", e) | |
| return None | |
| # Global chain | |
| qa_chain = None | |
| def upload_docs(files): | |
| global qa_chain | |
| qa_chain = process_files(files) | |
| if qa_chain is None: | |
| return "β Error processing files. Please make sure the file format is correct." | |
| return "β PDFs or Notebooks uploaded and processed. Now ask your questions." | |
| def ask_question(query): | |
| if qa_chain is None: | |
| return "β Please upload PDFs or Kaggle Notebooks first." | |
| try: | |
| return qa_chain.run(query) | |
| except Exception as e: | |
| return f"β Error answering question: {e}" | |
| # Gradio UI | |
| with gr.Blocks() as app: | |
| gr.Markdown("## π€ Kaggle Study Assistant\nUpload Kaggle `.pdf` or `.ipynb` files and ask intelligent questions.") | |
| with gr.Row(): | |
| upload = gr.File(file_types=[".pdf", ".ipynb"], file_count="multiple", label="Upload Kaggle Files") | |
| btn_upload = gr.Button("π₯ Process Files") | |
| upload_output = gr.Textbox(label="Upload Status") | |
| btn_upload.click(fn=upload_docs, inputs=upload, outputs=upload_output) | |
| question = gr.Textbox(label="Ask a question about uploaded notebooks") | |
| answer = gr.Textbox(label="Assistant Answer", interactive=False) | |
| question.submit(fn=ask_question, inputs=question, outputs=answer) | |
| app.launch() | |