Spaces:

Subha95
/

AI_Research_Assistant

Runtime error

App Files Files Community

Subha95 commited on Sep 13, 2025

Commit

fcbada7

verified ·

1 Parent(s): 4b022c1

Update ai_assistant.py

Browse files

Files changed (1) hide show

ai_assistant.py +30 -39

ai_assistant.py CHANGED Viewed

@@ -1,60 +1,48 @@
-import os
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-import torch
 from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
 from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
 from langchain_huggingface import HuggingFacePipeline
 from langchain.agents import initialize_agent, AgentType
-# ✅ Set your HF token in environment
-HF_TOKEN = os.getenv("HF_TOKEN")
-if not HF_TOKEN:
-    raise ValueError("Please set HF_TOKEN in your environment.")
-# ---- Build the agent ----
 def build_qa():
-    # ---- Tools ----
-    wiki_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=200)
-    wiki_tool = WikipediaQueryRun(api_wrapper=wiki_wrapper)
     arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
-    arxiv_tool = ArxivQueryRun(api_wrapper=arxiv_wrapper)
-    tools = [wiki_tool, arxiv_tool]
-    # ---- Model ----
-    model_name = "mistralai/Mistral-7B-Instruct-v0.3"  # HF repo
     tokenizer = AutoTokenizer.from_pretrained(model_name)
-    # Load the model in **FP16 if possible** and then apply **dynamic quantization** for CPU
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        device_map="cpu",
-    )
-    # ⚡ Apply dynamic quantization (CPU only)
-    model = torch.quantization.quantize_dynamic(
-        model, {torch.nn.Linear}, dtype=torch.qint8
-    )
-    # ---- HuggingFace pipeline ----
-    llm_pipeline = pipeline(
-        "text-generation",
         model=model,
         tokenizer=tokenizer,
         max_new_tokens=256,
         temperature=0.2,
         do_sample=False,
         top_p=0.9,
         repetition_penalty=1.2,
-        device=-1,  # CPU
-        return_full_text=False,
     )
-    # ---- Wrap in LangChain HuggingFacePipeline ----
-    hf_llm = HuggingFacePipeline(pipeline=llm_pipeline)
-    # ---- Initialize Agent ----
     agent = initialize_agent(
         tools=tools,
         llm=hf_llm,
@@ -65,8 +53,11 @@ def build_qa():
     return agent
-# ---- Example usage ----
-if __name__ == "__main__":
-    agent = build_qa()
-    response = agent.invoke({"input": "What is query, key, value in attention mechanism?"})
-    print("\n🤖 Answer:", response)

+# ai_assistant.py
 from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
 from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
 from langchain_huggingface import HuggingFacePipeline
 from langchain.agents import initialize_agent, AgentType
+import os
+from huggingface_hub import login
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline
+import torch
+# HF token login
+token = os.getenv("HF_TOKEN")
+if token:
+    login(token=token)
+# build agent
 def build_qa():
+    api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=200)
+    wiki = WikipediaQueryRun(api_wrapper=api_wrapper)
     arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
+    arxiv = ArxivQueryRun(api_wrapper=arxiv_wrapper)
+    tools = [wiki, arxiv]
+    # Load model
+    model_name = "mistralai/Mistral-7B-Instruct-v0.3"
     tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(model_name)
+    llm = TextGenerationPipeline(
         model=model,
         tokenizer=tokenizer,
+        task="text-generation",
         max_new_tokens=256,
         temperature=0.2,
         do_sample=False,
         top_p=0.9,
         repetition_penalty=1.2,
+        eos_token_id=tokenizer.eos_token_id,
+        return_full_text=False
     )
+    hf_llm = HuggingFacePipeline(pipeline=llm)
     agent = initialize_agent(
         tools=tools,
         llm=hf_llm,
     return agent
+# ✅ Define get_response function
+_agent_instance = None
+def get_response(user_input: str):
+    global _agent_instance
+    if _agent_instance is None:
+        _agent_instance = build_qa()
+    result = _agent_instance.invoke({"input": user_input})
+    return result