Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| """Untitled8.ipynb | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1krY-kSVbf8NSdFeA5eZ_1vvYGLuuSv7I | |
| """ | |
| import os | |
| import pandas as pd | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_openai import ChatOpenAI | |
| from langchain_openai import OpenAIEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.chains import RetrievalQA | |
| import gradio as gr | |
| # Step 5: Initialize the LLM | |
| openai_api_key = os.getenv("tauhid") | |
| print(f"API key retrieved: {'[NOT FOUND]' if not openai_api_key else '[FOUND - first 4 chars: ' + openai_api_key[:4] + ']'}") | |
| # Add this line to explicitly set the environment variable | |
| os.environ["OPENAI_API_KEY"] = openai_api_key | |
| # Then create embeddings | |
| embeddings = OpenAIEmbeddings() | |
| # Step 1: Load the System Prompt | |
| prompt_path = "system_prompt.txt" # Ensure this file is in the same directory | |
| if not os.path.exists(prompt_path): | |
| raise FileNotFoundError(f"The file '{prompt_path}' is missing. Please upload it to the Space.") | |
| with open(prompt_path, "r") as file: | |
| system_prompt = file.read() | |
| # Step 2: Load the Retrieval Database | |
| csv_path = "retrievaldb.csv" # Ensure this file is in the same directory | |
| if not os.path.exists(csv_path): | |
| raise FileNotFoundError(f"The file '{csv_path}' is missing. Please upload it to the Space.") | |
| # Load the CSV | |
| df = pd.read_csv(csv_path) | |
| # Step 3: Preprocess the Data | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
| texts = [] | |
| metadatas = [] | |
| # Process each row to chunk text and attach metadata | |
| for _, row in df.iterrows(): | |
| chunk_text = row.get("chunk_text", "") | |
| if pd.notna(chunk_text): | |
| chunks = text_splitter.split_text(chunk_text) | |
| for chunk in chunks: | |
| texts.append(chunk) | |
| metadatas.append({ | |
| "source": row.get("content_source", "Unknown Source"), | |
| "title": row.get("document_name", "Unknown Document"), | |
| "page": row.get("page_number", "N/A"), | |
| "topic": row.get("main_topic", "N/A"), | |
| "week": row.get("metadata", "N/A") | |
| }) | |
| if len(texts) != len(metadatas): | |
| raise ValueError("Mismatch between texts and metadata after preprocessing.") | |
| # Step 4: Create the Vector Store | |
| embeddings = OpenAIEmbeddings() | |
| vector_store = FAISS.from_texts( | |
| texts=texts, | |
| embedding=embeddings, | |
| metadatas=metadatas | |
| ) | |
| # Initialize the LLM | |
| llm = ChatOpenAI( | |
| model_name="gpt-4o-mini", | |
| temperature=0.7, | |
| api_key=openai_api_key | |
| ) | |
| # Initialize Embeddings with the same key | |
| embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) | |
| # Step 6: Set Up the RetrievalQA Chain | |
| retriever = vector_store.as_retriever(search_kwargs={"k": 5}) | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type="stuff", # Concatenates retrieved chunks for context | |
| retriever=retriever, | |
| return_source_documents=False # Do not include source documents in the response | |
| ) | |
| # Step 7: Define Query Function | |
| def query_bradtgpt(user_input): | |
| # Add system prompt dynamically to the query | |
| full_prompt = f""" | |
| {system_prompt} | |
| User: {user_input} | |
| Assistant: | |
| """ | |
| response = qa_chain({"query": full_prompt}) | |
| return response["result"] # Return the main answer only | |
| # Step 8: Gradio Interface | |
| def respond(message): | |
| return query_bradtgpt(message) | |
| demo = gr.Interface( | |
| fn=respond, | |
| inputs=gr.Textbox( | |
| label="Your question", | |
| placeholder="Ask BradGPT anything about CPSC 183!", | |
| lines=3 | |
| ), | |
| outputs=gr.Textbox( | |
| label="Response", | |
| lines=10 | |
| ), | |
| title="BradGPT", | |
| description="Ask BradGPT questions about CPSC 183 course readings or topics.", | |
| theme="monochrome" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |