sultan-hassan's picture
Update app.py
b4dd8b9 verified
import gradio as gr
import requests
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_chroma import Chroma
from langchain_core.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
# scrape docs from github using jina
def load_doc():
urlsfile = open("./new_docs.txt")
urls = urlsfile.readlines()
base_path = "https://github.com/spacetelescope/notebook-ci-actions/tree/dev-actions-v2-pipeline/docs/"
urls = [base_path+url.replace("\n","") for url in urls]
pages = []
for url in urls:
url = "https://r.jina.ai/" + url
pages.append(requests.get(url).text)
return pages
# embed to convert to tokens
embeddings = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
# define the vector store and use it as retriever!
vectorstore = Chroma(
collection_name="notebook_docs",
embedding_function=embeddings,
persist_directory="./",
)
vectorstore.add_texts(load_doc())
retriever = vectorstore.as_retriever()
# RAG prompt
template = ("""
You are a GitHub Actions and Jupyter Notebook expert.
Your task is to answer the question **using only the information provided in the context**.
If the context does not contain enough information, clearly state that and, if appropriate,
briefly outline what additional details would be needed to give a complete answer.
Guidelines:
- Focus only on the question. Do not mention the context or that you’re using retrieved text.
- Provide step-by-step, technically accurate explanations and examples where relevant.
- Avoid speculation, guesses, or outdated practices.
- Prefer modern, secure, and well-supported methods.
- Keep the answer concise but complete.
Context:
{context}
Question:
{question}
Answer:
""")
#template = ("""You are Github action and Jupyter Notebook expert.
# Use the provided context to answer the question.
# If you don't know the answer, say so. Explain your answer in detail.
# Do not discuss the context in your response; just provide the answer directly.
# Context: {context}
# Question: {question}
# Answer:""")
rag_prompt = PromptTemplate.from_template(template)
# define the LLM
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",temperature=0.1, google_api_key=os.environ.get('GOOGLE_API_KEY'))
rag_chain = (
{"context": retriever, "question": RunnablePassthrough()}
| rag_prompt
| llm
| StrOutputParser()
)
def rag_memory_stream(message, history):
partial_text = ""
for new_text in rag_chain.stream(message):
partial_text += new_text
yield partial_text
examples = ['What is the Unified Notebook CI/CD System?', 'How to migrate the old CI to the new CI?']
description = "Real-time AI App to Answer questions about the unified Notebook CI/CD system"
title = "Notebook CI/CD chatbot"
demo = gr.ChatInterface(fn=rag_memory_stream,
type="messages",
title=title,
description=description,
fill_height=True,
examples=examples,
theme="glass",
)
if __name__ == "__main__":
demo.launch()