Spaces:

datboyalex
/

bradgpt

Runtime error

App Files Files Community

datboyalex commited on Dec 18, 2024

Commit

bc94c3a

verified ·

1 Parent(s): 2d87315

Upload app.py

Browse files

Files changed (1) hide show

app.py +119 -0

app.py ADDED Viewed

	@@ -0,0 +1,119 @@

+# -*- coding: utf-8 -*-
+"""Untitled8.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1krY-kSVbf8NSdFeA5eZ_1vvYGLuuSv7I
+"""
+import os
+import pandas as pd
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_openai import ChatOpenAI
+from langchain_openai import OpenAIEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.chains import RetrievalQA
+import gradio as gr
+# Step 1: Load the System Prompt
+prompt_path = "system_prompt.txt"  # Ensure this file is in the same directory
+if not os.path.exists(prompt_path):
+    raise FileNotFoundError(f"The file '{prompt_path}' is missing. Please upload it to the Space.")
+with open(prompt_path, "r") as file:
+    system_prompt = file.read()
+# Step 2: Load the Retrieval Database
+csv_path = "retrievaldb.csv"  # Ensure this file is in the same directory
+if not os.path.exists(csv_path):
+    raise FileNotFoundError(f"The file '{csv_path}' is missing. Please upload it to the Space.")
+# Load the CSV
+df = pd.read_csv(csv_path)
+# Step 3: Preprocess the Data
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+texts = []
+metadatas = []
+# Process each row to chunk text and attach metadata
+for _, row in df.iterrows():
+    chunk_text = row.get("chunk_text", "")
+    if pd.notna(chunk_text):
+        chunks = text_splitter.split_text(chunk_text)
+        for chunk in chunks:
+            texts.append(chunk)
+            metadatas.append({
+                "source": row.get("content_source", "Unknown Source"),
+                "title": row.get("document_name", "Unknown Document"),
+                "page": row.get("page_number", "N/A"),
+                "topic": row.get("main_topic", "N/A"),
+                "week": row.get("metadata", "N/A")
+            })
+if len(texts) != len(metadatas):
+    raise ValueError("Mismatch between texts and metadata after preprocessing.")
+# Step 4: Create the Vector Store
+embeddings = OpenAIEmbeddings()
+vector_store = FAISS.from_texts(
+    texts=texts,
+    embedding=embeddings,
+    metadatas=metadatas
+)
+# Step 5: Initialize the LLM
+openai_api_key = os.getenv("OPENAI_API_KEY")  # Securely access the API key from Hugging Face Secrets
+if not openai_api_key:
+    raise ValueError("OPENAI_API_KEY environment variable is not set. Please add it to the Space Secrets.")
+llm = ChatOpenAI(
+    model_name="gpt-4o-mini",
+    temperature=0.7,
+    api_key=openai_api_key
+)
+# Step 6: Set Up the RetrievalQA Chain
+retriever = vector_store.as_retriever(search_kwargs={"k": 5})
+qa_chain = RetrievalQA.from_chain_type(
+    llm=llm,
+    chain_type="stuff",  # Concatenates retrieved chunks for context
+    retriever=retriever,
+    return_source_documents=False  # Do not include source documents in the response
+)
+# Step 7: Define Query Function
+def query_bradtgpt(user_input):
+    # Add system prompt dynamically to the query
+    full_prompt = f"""
+    {system_prompt}
+    User: {user_input}
+    Assistant:
+    """
+    response = qa_chain({"query": full_prompt})
+    return response["result"]  # Return the main answer only
+# Step 8: Gradio Interface
+def respond(message):
+    return query_bradtgpt(message)
+demo = gr.Interface(
+    fn=respond,
+    inputs=gr.Textbox(
+        label="Your question",
+        placeholder="Ask BradGPT anything about CPSC 183!",
+        lines=3
+    ),
+    outputs=gr.Textbox(
+        label="Response",
+        lines=10
+    ),
+    title="BradGPT",
+    description="Ask BradGPT questions about CPSC 183 course readings or topics.",
+    theme="monochrome"
+)
+if __name__ == "__main__":
+    demo.launch()