Spaces:

Subha95
/

AI_Research_Assistant

Runtime error

App Files Files Community

Subha95 commited on Sep 13, 2025

Commit

f605107

verified ·

1 Parent(s): d4ca553

Update ai_assistant.py

Browse files

Files changed (1) hide show

ai_assistant.py +61 -39

ai_assistant.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import os
 from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
 from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
-from langchain_huggingface import HuggingFacePipeline
 from langchain.agents import initialize_agent, AgentType
-from huggingface_hub import login
-from langchain_community.llms import LlamaCpp
-# ✅ Hugging Face login
 token = os.getenv("HF_TOKEN")
 print("🔑 HF_TOKEN available?", token is not None)
 if token:
@@ -18,53 +19,74 @@ else:
 def build_qa():
     print("🚀 Starting QA pipeline...")
-    # ---- Tools ----
-    wiki_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=200)
-    wiki = WikipediaQueryRun(api_wrapper=wiki_wrapper)
-    arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
-    arxiv = ArxivQueryRun(api_wrapper=arxiv_wrapper)
-    tools = [wiki, arxiv]
-    # ---- Model ----
-    model_file = "mistral-7b-instruct-v0.2.Q4_K_M.gguf"  # quantized GGUF
-    print("🔹 Loading LLaMA.cpp model (CPU optimized)...")
-    llm = LlamaCpp(
-        model_path=model_file,
-        n_ctx=1024,      # smaller context → faster
-        n_threads=8,     # match CPU cores
-        n_gpu_layers=0,  # pure CPU
-        verbose=False
-    )
-    # ---- Wrap for LangChain Agent ----
-    hf_llm = HuggingFacePipeline(pipeline=llm)
-    agent = initialize_agent(
-        tools=tools,
-        llm=hf_llm,
-        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
-        verbose=True,
-        handle_parsing_errors=True
-    )
-    print("✅ QA pipeline ready.")
-    return agent
-# ---- Initialize once ----
-try:
-    agent = build_qa()
-except Exception as e:
-    print("❌ Failed to build QA pipeline:", str(e))
-    agent = None
-def get_response(message, history=None):
     if agent is None:
         return "⚠️ QA pipeline not initialized."
     try:
-        return agent.invoke({"input": message})
     except Exception as e:
         return f"❌ QA run failed: {e}"

 import os
+import traceback
+from huggingface_hub import login
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from langchain_huggingface import HuggingFacePipeline
 from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
 from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
 from langchain.agents import initialize_agent, AgentType
+# ✅ Login to HF Hub
 token = os.getenv("HF_TOKEN")
 print("🔑 HF_TOKEN available?", token is not None)
 if token:
 def build_qa():
     print("🚀 Starting QA pipeline...")
+    try:
+        # ---- TOOLS ----
+        api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=200)
+        wiki = WikipediaQueryRun(api_wrapper=api_wrapper)
+        arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
+        arxiv = ArxivQueryRun(api_wrapper=arxiv_wrapper)
+        tools = [wiki, arxiv]
+        print("🔹 Tools initialized:", [type(t).__name__ for t in tools])
+        # ---- MODEL ----
+        model_name = "mistralai/Mistral-7B-Instruct-v0.2"  # HF PyTorch checkpoint
+        print("🔹 Loading tokenizer...")
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        print("🔹 Loading model with 8-bit quantization (CPU)...")
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            device_map="auto",     # automatically place layers on CPU
+            load_in_8bit=True,     # 8-bit quantization
+            trust_remote_code=True
+        )
+        print("✅ Model loaded")
+        # ---- PIPELINE ----
+        llm_pipeline = pipeline(
+            task="text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            max_new_tokens=256,
+            temperature=0.2,
+            do_sample=False,
+            top_p=0.9,
+            repetition_penalty=1.2,
+            eos_token_id=tokenizer.eos_token_id,
+            return_full_text=False,
+        )
+        hf_llm = HuggingFacePipeline(pipeline=llm_pipeline)
+        print("✅ Pipeline ready")
+        # ---- AGENT ----
+        agent = initialize_agent(
+            tools=tools,
+            llm=hf_llm,
+            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
+            verbose=True,
+            handle_parsing_errors=True,
+        )
+        print("✅ Agent initialized")
+        return agent
+    except Exception as e:
+        print("❌ Failed to build QA pipeline")
+        traceback.print_exc()
+        return None
+# Build pipeline at import
+agent = build_qa()
+def get_response(query: str) -> str:
     if agent is None:
         return "⚠️ QA pipeline not initialized."
     try:
+        return agent.invoke({"input": query})
     except Exception as e:
         return f"❌ QA run failed: {e}"