Spaces:

KnockoutNed
/

multiagentic-stat-TA

Sleeping

App Files Files Community

KOkeke94 commited on Jul 29, 2025

Commit

1b7cf63

1 Parent(s): 06a77e0

Fix: Wrap HF pipeline with LangChain, correct imports, remove OpenAI deps

Browse files

Files changed (2) hide show

app.py +25 -40
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -3,57 +3,41 @@ import gradio as gr
 import torch
 from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import OpenAIEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain.chains import RetrievalQA
-from langchain_openai import ChatOpenAI
-from transformers.pipelines import pipeline
-# ✅ Load API key from Hugging Face secret
-openai_key = os.environ.get("OPENAI_API_KEY")
-llm = ChatOpenAI(api_key=openai_key, model="gpt-3.5-turbo", temperature=0)
-# ✅ Build RAG agent
 def build_rag_agent(pdf_path):
     loader = PyPDFLoader(pdf_path)
     docs = loader.load()
     splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
     chunks = splitter.split_documents(docs)
-    embeddings = OpenAIEmbeddings(api_key=openai_key)
     vectorstore = FAISS.from_documents(chunks, embeddings)
     retriever = vectorstore.as_retriever()
-    return RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")
-# ✅ Load RAG agents
 stat6371_agent = build_rag_agent("PDFs/DS 6371 Syllabus Ver 6.pdf")
 ds7333_agent = build_rag_agent("PDFs/ds-7333_syllabus.pdf")
-# ✅ Load Hugging Face fine-tuned model
-general_stat_agent = pipeline("text2text-generation", model="BivinSadler/llama3-finetuned-Statistics")
-# ✅ Routing logic
-def route_question_llm(question):
-    prompt = f"""
-You are a classification agent that helps route questions to the appropriate expert.
-There are three possible categories:
-A. Stat 6371 (Theoretical statistics course)
-B. DS 7333 (Decision Analytics Course)
-C. General statistics (any other statistics question)
-Classify the following question into one of those three categories by answering only with a single letter: A, B, or C.
-Question: "{question}"
-Answer:"""
-    response = llm.invoke(prompt).content.strip().upper()
-    if response.startswith("A"):
-        return "stat6371"
-    elif response.startswith("B"):
-        return "ds7333"
-    else:
-        return "general"
-# ✅ Writer agent
 def writer_agent(raw_answer, audience="high school students"):
     prompt = f"""
 You are a talented science communicator. Your job is to explain the following answer in a way that is clear, short, and engaging for {audience}.
@@ -63,12 +47,13 @@ Answer:
 Write your response in 2–3 sentences. Avoid technical jargon.
 """
-    return llm.invoke(prompt).content
-# ✅ Main app logic
 def multiagent_system(question):
     print(f"🧭 Routing: {question}")
-    route = route_question_llm(question)
     if route == "stat6371":
         print("🔎 Stat 6371 RAG")
@@ -78,13 +63,13 @@ def multiagent_system(question):
         raw_answer = ds7333_agent.run(question)
     else:
         print("🧠 General Stats HF Agent")
-        result = general_stat_agent(question, max_new_tokens=200, do_sample=False)
         raw_answer = result[0]['generated_text']
     print("✍️ Simplifying...")
     return writer_agent(raw_answer)
-# ✅ Gradio UI
 iface = gr.Interface(
     fn=multiagent_system,
     inputs=gr.Textbox(lines=2, label="Ask a statistics question"),

 import torch
 from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain.chains import RetrievalQA
+from langchain_community.llms import HuggingFacePipeline
+from transformers import pipeline
+# ✅ Hugging Face pipelines
+routing_agent = pipeline("text-classification", model="BivinSadler/statistics-routing-agent")
+writer_model = pipeline("text2text-generation", model="BivinSadler/llama3-finetuned-Statistics")
+writer_llm = HuggingFacePipeline(pipeline=writer_model)
+# ✅ RAG Agent Builder
 def build_rag_agent(pdf_path):
     loader = PyPDFLoader(pdf_path)
     docs = loader.load()
     splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
     chunks = splitter.split_documents(docs)
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     vectorstore = FAISS.from_documents(chunks, embeddings)
     retriever = vectorstore.as_retriever()
+    return RetrievalQA.from_chain_type(llm=writer_llm, retriever=retriever, chain_type="stuff")
+# ✅ Load agents
 stat6371_agent = build_rag_agent("PDFs/DS 6371 Syllabus Ver 6.pdf")
 ds7333_agent = build_rag_agent("PDFs/ds-7333_syllabus.pdf")
+# ✅ Routing
+def route_question(question):
+    label = routing_agent(question)[0]["label"]
+    return {
+        "LABEL_0": "stat6371",
+        "LABEL_1": "ds7333"
+    }.get(label, "general")
+# ✅ Writing
 def writer_agent(raw_answer, audience="high school students"):
     prompt = f"""
 You are a talented science communicator. Your job is to explain the following answer in a way that is clear, short, and engaging for {audience}.
 Write your response in 2–3 sentences. Avoid technical jargon.
 """
+    result = writer_model(prompt, max_new_tokens=200, do_sample=False)
+    return result[0]['generated_text']
+# ✅ Core Logic
 def multiagent_system(question):
     print(f"🧭 Routing: {question}")
+    route = route_question(question)
     if route == "stat6371":
         print("🔎 Stat 6371 RAG")
         raw_answer = ds7333_agent.run(question)
     else:
         print("🧠 General Stats HF Agent")
+        result = writer_model(question, max_new_tokens=200, do_sample=False)
         raw_answer = result[0]['generated_text']
     print("✍️ Simplifying...")
     return writer_agent(raw_answer)
+# ✅ Gradio
 iface = gr.Interface(
     fn=multiagent_system,
     inputs=gr.Textbox(lines=2, label="Ask a statistics question"),

requirements.txt CHANGED Viewed

@@ -5,6 +5,7 @@ faiss-cpu
 PyPDF2
 pypdf
 transformers
 gradio
 torch
 tiktoken

 PyPDF2
 pypdf
 transformers
+sentence-transformers
 gradio
 torch
 tiktoken