Spaces:

KnockoutNed
/

multiagentic-stat-TA

Sleeping

App Files Files Community

KOkeke94 commited on Jul 29, 2025

Commit

297f3ae

1 Parent(s): a2af92f

Fix: add missing torch import for HF pipeline

Browse files

Files changed (1) hide show

app.py +48 -45

app.py CHANGED Viewed

@@ -1,79 +1,83 @@
 import os
 import gradio as gr
-from transformers.pipelines import pipeline
-from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_community.vectorstores import FAISS
 from langchain.chains import RetrievalQA
-from langchain_community.llms import HuggingFacePipeline
-# ✅ Load Hugging Face LLM (LLama 3 fine-tuned model)
-llm_pipeline = pipeline("text2text-generation", model="BivinSadler/llama3-finetuned-Statistics", max_length=512)
-llm = HuggingFacePipeline(pipeline=llm_pipeline)
-# ✅ Create embeddings for RAG
-embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-# ✅ Build RAG Agent Function
 def build_rag_agent(pdf_path):
     loader = PyPDFLoader(pdf_path)
     docs = loader.load()
     splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
     chunks = splitter.split_documents(docs)
-    vectorstore = FAISS.from_documents(chunks, embedding_model)
     retriever = vectorstore.as_retriever()
     return RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")
-# ✅ Create RAG agents for both syllabi
 stat6371_agent = build_rag_agent("PDFs/DS 6371 Syllabus Ver 6.pdf")
 ds7333_agent = build_rag_agent("PDFs/ds-7333_syllabus.pdf")
-# ✅ Writer Agent (makes answers easier)
-def writer_agent(raw_answer, audience="high school students"):
     prompt = f"""
-You are a skilled teacher explaining to {audience}. Simplify the following answer in 2–3 short, clear sentences:
-Answer:
-{raw_answer}
-"""
-    result = llm_pipeline(prompt, max_length=200, do_sample=False)
-    return result[0]['generated_text']
-# ✅ Question Routing Agent (classifies the question)
-def route_question(question):
-    routing_prompt = f"""
-You are a routing agent. Classify the question into one of:
-A. Stat 6371
-B. DS 7333
-C. General statistics
 Question: "{question}"
-Answer with only A, B, or C.
-"""
-    result = llm_pipeline(routing_prompt, max_new_tokens=30, do_sample=False)
-    route = result[0]['generated_text'].strip().upper()
-    if route.startswith("A"):
         return "stat6371"
-    elif route.startswith("B"):
         return "ds7333"
     else:
         return "general"
-# ✅ Multi-Agent Pipeline
 def multiagent_system(question):
-    print(f"\n🧭 Routing: {question}")
-    route = route_question(question)
     if route == "stat6371":
-        print("🔎 Stat 6371 Agent")
         raw_answer = stat6371_agent.run(question)
     elif route == "ds7333":
-        print("🔎 DS 7333 Agent")
         raw_answer = ds7333_agent.run(question)
     else:
         print("🧠 General Stats HF Agent")
-        result = llm_pipeline(question, max_length=200, do_sample=False)
         raw_answer = result[0]['generated_text']
     print("✍️ Simplifying...")
@@ -84,9 +88,8 @@ iface = gr.Interface(
     fn=multiagent_system,
     inputs=gr.Textbox(lines=2, label="Ask a statistics question"),
     outputs=gr.Textbox(label="Answer"),
-    title="📊 Multi-Agent Statistics Assistant (HuggingFace)",
-    description="Routes your stats question to the right syllabus (Stat 6371, DS 7333) or uses a general statistics model (LLama3)."
 )
-if __name__ == "__main__":
-    iface.launch()

 import os
 import gradio as gr
+import torch
+from langchain.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.vectorstores import FAISS
 from langchain.chains import RetrievalQA
+from langchain.chat_models import ChatOpenAI
+from transformers.pipelines import pipeline
+# ✅ Load API key from environment variable (set in Hugging Face Secrets)
+openai_key = os.environ.get("OPENAI_API_KEY")
+llm = ChatOpenAI(openai_api_key=openai_key, model_name="gpt-3.5-turbo", temperature=0)
+# ✅ Build RAG agent
 def build_rag_agent(pdf_path):
     loader = PyPDFLoader(pdf_path)
     docs = loader.load()
     splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
     chunks = splitter.split_documents(docs)
+    vectorstore = FAISS.from_documents(chunks, OpenAIEmbeddings(openai_api_key=openai_key))
     retriever = vectorstore.as_retriever()
     return RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")
+# ✅ Load RAG agents
 stat6371_agent = build_rag_agent("PDFs/DS 6371 Syllabus Ver 6.pdf")
 ds7333_agent = build_rag_agent("PDFs/ds-7333_syllabus.pdf")
+# ✅ Load HF fine-tuned model for general stats
+general_stat_agent = pipeline("text2text-generation", model="BivinSadler/llama3-finetuned-Statistics")
+# ✅ Routing agent
+def route_question_llm(question):
     prompt = f"""
+You are a classification agent that helps route questions to the appropriate expert.
+There are three possible categories:
+A. Stat 6371 (Theoretical statistics course)
+B. DS 7333 (Decision Analytics Course)
+C. General statistics (any other statistics question)
+Classify the following question into one of those three categories by answering only with a single letter: A, B, or C.
 Question: "{question}"
+Answer:"""
+    route_response = llm.invoke(prompt).content.strip().upper()
+    if route_response.startswith("A"):
         return "stat6371"
+    elif route_response.startswith("B"):
         return "ds7333"
     else:
         return "general"
+# ✅ Writer agent
+def writer_agent(raw_answer, audience="high school students"):
+    prompt = f"""
+You are a talented science communicator. Your job is to explain the following answer in a way that is clear, short, and engaging for {audience}.
+Answer:
+{raw_answer}
+Write your response in 2–3 sentences. Avoid technical jargon.
+"""
+    return llm.invoke(prompt).content
+# ✅ Multi-agent logic
 def multiagent_system(question):
+    print(f"🧭 Routing: {question}")
+    route = route_question_llm(question)
     if route == "stat6371":
+        print("🔎 Stat 6371 RAG")
         raw_answer = stat6371_agent.run(question)
     elif route == "ds7333":
+        print("🔎 DS 7333 RAG")
         raw_answer = ds7333_agent.run(question)
     else:
         print("🧠 General Stats HF Agent")
+        result = general_stat_agent(question, max_new_tokens=200, do_sample=False)
         raw_answer = result[0]['generated_text']
     print("✍️ Simplifying...")
     fn=multiagent_system,
     inputs=gr.Textbox(lines=2, label="Ask a statistics question"),
     outputs=gr.Textbox(label="Answer"),
+    title="📊 Multi-Agent Statistics Assistant",
+    description="Routes your stats question to the right syllabus (Stat 6371, DS 7333) or uses a general statistics model (Llama3)."
 )
+iface.launch()