Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import os | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain_chroma import Chroma | |
| from langchain_core.prompts import PromptTemplate | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_core.prompts import PromptTemplate | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_core.runnables import RunnablePassthrough | |
| # scrape docs from github using jina | |
| def load_doc(): | |
| urlsfile = open("./new_docs.txt") | |
| urls = urlsfile.readlines() | |
| base_path = "https://github.com/spacetelescope/notebook-ci-actions/tree/dev-actions-v2-pipeline/docs/" | |
| urls = [base_path+url.replace("\n","") for url in urls] | |
| pages = [] | |
| for url in urls: | |
| url = "https://r.jina.ai/" + url | |
| pages.append(requests.get(url).text) | |
| return pages | |
| # embed to convert to tokens | |
| embeddings = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1") | |
| # define the vector store and use it as retriever! | |
| vectorstore = Chroma( | |
| collection_name="notebook_docs", | |
| embedding_function=embeddings, | |
| persist_directory="./", | |
| ) | |
| vectorstore.add_texts(load_doc()) | |
| retriever = vectorstore.as_retriever() | |
| # RAG prompt | |
| template = (""" | |
| You are a GitHub Actions and Jupyter Notebook expert. | |
| Your task is to answer the question **using only the information provided in the context**. | |
| If the context does not contain enough information, clearly state that and, if appropriate, | |
| briefly outline what additional details would be needed to give a complete answer. | |
| Guidelines: | |
| - Focus only on the question. Do not mention the context or that you’re using retrieved text. | |
| - Provide step-by-step, technically accurate explanations and examples where relevant. | |
| - Avoid speculation, guesses, or outdated practices. | |
| - Prefer modern, secure, and well-supported methods. | |
| - Keep the answer concise but complete. | |
| Context: | |
| {context} | |
| Question: | |
| {question} | |
| Answer: | |
| """) | |
| #template = ("""You are Github action and Jupyter Notebook expert. | |
| # Use the provided context to answer the question. | |
| # If you don't know the answer, say so. Explain your answer in detail. | |
| # Do not discuss the context in your response; just provide the answer directly. | |
| # Context: {context} | |
| # Question: {question} | |
| # Answer:""") | |
| rag_prompt = PromptTemplate.from_template(template) | |
| # define the LLM | |
| llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",temperature=0.1, google_api_key=os.environ.get('GOOGLE_API_KEY')) | |
| rag_chain = ( | |
| {"context": retriever, "question": RunnablePassthrough()} | |
| | rag_prompt | |
| | llm | |
| | StrOutputParser() | |
| ) | |
| def rag_memory_stream(message, history): | |
| partial_text = "" | |
| for new_text in rag_chain.stream(message): | |
| partial_text += new_text | |
| yield partial_text | |
| examples = ['What is the Unified Notebook CI/CD System?', 'How to migrate the old CI to the new CI?'] | |
| description = "Real-time AI App to Answer questions about the unified Notebook CI/CD system" | |
| title = "Notebook CI/CD chatbot" | |
| demo = gr.ChatInterface(fn=rag_memory_stream, | |
| type="messages", | |
| title=title, | |
| description=description, | |
| fill_height=True, | |
| examples=examples, | |
| theme="glass", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |