Spaces:

adamfallon
/

Trainline-Train-Times

Runtime error

App Files Files Community

Adam Fallon commited on Oct 19, 2023

Commit

6f732f7

1 Parent(s): 0e8964f

simplify and add description

Browse files

Files changed (1) hide show

app.py +71 -16

app.py CHANGED Viewed

@@ -8,13 +8,14 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.llms import HuggingFaceHub
 from tl_loaders.TrainlineTrainTimeLoader import TrainlineTrainTimeLoader
 from langchain.chains import RetrievalQA
 import gradio as gr
 from dotenv import load_dotenv
 load_dotenv()
 embedding_model = "sentence-transformers/all-mpnet-base-v2"
-persist_directory = "docs/chroma/openai"
 chunk_size = 1000
 chunk_overlap = 0
@@ -33,7 +34,7 @@ greet = "Ask a question in the like 'How many trains per day from Rome to Madrid
 newline = "\n"
 llm = None
 db = None
-force_reindex = True
 chat_history = []
 # Few random ones and top results from https://www.thetrainline.com/train-times
@@ -56,14 +57,33 @@ urls = {
 }
-def ask_question(message, history):
     qa = RetrievalQA.from_chain_type(
         llm=llm,
         chain_type="stuff",
         retriever=db.as_retriever(search_kwargs=search_kwargs),
         return_source_documents=True,
     )
     # result = qa(f"{newline.join(chat_history)}\n[INST]{message}[/INST]")
     result = qa(f"[INST]{message}[/INST]")
@@ -77,26 +97,61 @@ def ask_question(message, history):
     else:
         try:
             source = result["source_documents"][0].metadata["source"]
-            return f"{answer}\n[Source]({source})"
         except:
             return f"{answer}"
 def setup_gradio():
-    demo = gr.ChatInterface(
-        fn=ask_question,
-        examples=[
-            "Trains per day from London to Edinburgh?",
-            "When is the last train from Madrid to Barcelona?",
-            "Train and bus operators from Rome to Madrid?",
-            "How many changes from Barcelona to Madrid?",
-            "Price from London to Madrid?",
-        ],
-        title="Trainline Q & A 🤖",
-        description=f"Ask questions about routes. Supported routes: {', '.join(urls.values())}",
     )
-    demo.launch()
 def load_docs():
     loader = TrainlineTrainTimeLoader(list(urls.keys()), urls_to_od_pair=urls)

 from langchain.llms import HuggingFaceHub
 from tl_loaders.TrainlineTrainTimeLoader import TrainlineTrainTimeLoader
 from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
 import gradio as gr
 from dotenv import load_dotenv
 load_dotenv()
 embedding_model = "sentence-transformers/all-mpnet-base-v2"
+persist_directory = "docs/chroma/"
 chunk_size = 1000
 chunk_overlap = 0
 newline = "\n"
 llm = None
 db = None
+force_reindex = False
 chat_history = []
 # Few random ones and top results from https://www.thetrainline.com/train-times
 }
+def ask_question(message):
+    prompt = """
+        Use the following pieces of context to answer the question at the end.
+        If you don't know the answer, just say that you don't know, don't try to make up an answer.
+        DO NOT RAMBLE or try to infer information.
+        Just give the exact information requested.
+        Take a deep breath and work on this problem step-by-step.
+        {context}
+        Question: {question}
+    """
     qa = RetrievalQA.from_chain_type(
         llm=llm,
         chain_type="stuff",
         retriever=db.as_retriever(search_kwargs=search_kwargs),
         return_source_documents=True,
+        chain_type_kwargs={
+            "prompt": PromptTemplate(
+                template=prompt,
+                input_variables=["context", "question"],
+            ),
+        },
     )
+    print(qa.combine_documents_chain.llm_chain.prompt.template)
     # result = qa(f"{newline.join(chat_history)}\n[INST]{message}[/INST]")
     result = qa(f"[INST]{message}[/INST]")
     else:
         try:
             source = result["source_documents"][0].metadata["source"]
+            doc = result["source_documents"][0].page_content
+            return f"{answer}\n---\nInformation found in text: {doc}\nSource: {source}"
         except:
             return f"{answer}"
 def setup_gradio():
+    desc = "Welcome to Trainline Train Time Q&A! A silly little demo of how RAG can ground the answers from an LLM."
+    long_desc = """
+    Ask a question in the like 'How many trains per day from Rome to Madrid'.
+    This info is scrapped from the table on train time pages. Example page here: https://www.thetrainline.com/train-times/manchester-to-london
+    Not all train time pages have been scrapped so you don't get answers for every route, just the ones in the supported routes below and only answers for the supported questions.
+    Supported questions are;
+    - Price from X to Y
+    - Last train from X to Y
+    - First train from X to Y
+    - Frequency of trains from X to Y
+    - Price of trains from X to Y
+    - Operators of trains and buses from X to Y
+    - Distance from X to Y
+    - Number of Changes from X to Y
+    - Journey time from X to Y
+    Supported routes are;
+    - "London to Edinburgh",
+    - "Madrid to Barcelona",
+    - "Rome to Madrid",
+    - "Barcelona to Madrid",
+    - "London to Madrid",
+    - "London to Manchester",
+    - "Leeds to London",
+    - "London to Birmingham",
+    - "London to Brighton",
+    - "Glasgow to Manchester",
+    - "Glasgow to Liverpool",
+    - "Glasgow to Leeds",
+    - "Birmingham to Glasgow",
+    - "London to Newcastle",
+    - "Seville to Madrid",
+    """
+    iface = gr.Interface(
+        ask_question,
+        inputs="text",
+        outputs="text",
+        allow_screenshot=False,
+        allow_flagging=False,
+        description=desc,
+        article=long_desc
     )
+    iface.launch()
 def load_docs():
     loader = TrainlineTrainTimeLoader(list(urls.keys()), urls_to_od_pair=urls)