Subha95 commited on
Commit
f605107
Β·
verified Β·
1 Parent(s): d4ca553

Update ai_assistant.py

Browse files
Files changed (1) hide show
  1. ai_assistant.py +61 -39
ai_assistant.py CHANGED
@@ -1,12 +1,13 @@
1
  import os
 
 
 
 
2
  from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
3
  from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
4
- from langchain_huggingface import HuggingFacePipeline
5
  from langchain.agents import initialize_agent, AgentType
6
- from huggingface_hub import login
7
- from langchain_community.llms import LlamaCpp
8
 
9
- # βœ… Hugging Face login
10
  token = os.getenv("HF_TOKEN")
11
  print("πŸ”‘ HF_TOKEN available?", token is not None)
12
  if token:
@@ -18,53 +19,74 @@ else:
18
  def build_qa():
19
  print("πŸš€ Starting QA pipeline...")
20
 
21
- # ---- Tools ----
22
- wiki_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=200)
23
- wiki = WikipediaQueryRun(api_wrapper=wiki_wrapper)
 
24
 
25
- arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
26
- arxiv = ArxivQueryRun(api_wrapper=arxiv_wrapper)
27
 
28
- tools = [wiki, arxiv]
 
29
 
30
- # ---- Model ----
31
- model_file = "mistral-7b-instruct-v0.2.Q4_K_M.gguf" # quantized GGUF
32
- print("πŸ”Ή Loading LLaMA.cpp model (CPU optimized)...")
33
- llm = LlamaCpp(
34
- model_path=model_file,
35
- n_ctx=1024, # smaller context β†’ faster
36
- n_threads=8, # match CPU cores
37
- n_gpu_layers=0, # pure CPU
38
- verbose=False
39
- )
40
 
41
- # ---- Wrap for LangChain Agent ----
42
- hf_llm = HuggingFacePipeline(pipeline=llm)
43
 
44
- agent = initialize_agent(
45
- tools=tools,
46
- llm=hf_llm,
47
- agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
48
- verbose=True,
49
- handle_parsing_errors=True
50
- )
 
51
 
52
- print("βœ… QA pipeline ready.")
53
- return agent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
 
56
- # ---- Initialize once ----
57
- try:
58
- agent = build_qa()
59
- except Exception as e:
60
- print("❌ Failed to build QA pipeline:", str(e))
61
- agent = None
62
 
63
 
64
- def get_response(message, history=None):
65
  if agent is None:
66
  return "⚠️ QA pipeline not initialized."
67
  try:
68
- return agent.invoke({"input": message})
69
  except Exception as e:
70
  return f"❌ QA run failed: {e}"
 
1
  import os
2
+ import traceback
3
+ from huggingface_hub import login
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
5
+ from langchain_huggingface import HuggingFacePipeline
6
  from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
7
  from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
 
8
  from langchain.agents import initialize_agent, AgentType
 
 
9
 
10
+ # βœ… Login to HF Hub
11
  token = os.getenv("HF_TOKEN")
12
  print("πŸ”‘ HF_TOKEN available?", token is not None)
13
  if token:
 
19
  def build_qa():
20
  print("πŸš€ Starting QA pipeline...")
21
 
22
+ try:
23
+ # ---- TOOLS ----
24
+ api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=200)
25
+ wiki = WikipediaQueryRun(api_wrapper=api_wrapper)
26
 
27
+ arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
28
+ arxiv = ArxivQueryRun(api_wrapper=arxiv_wrapper)
29
 
30
+ tools = [wiki, arxiv]
31
+ print("πŸ”Ή Tools initialized:", [type(t).__name__ for t in tools])
32
 
33
+ # ---- MODEL ----
34
+ model_name = "mistralai/Mistral-7B-Instruct-v0.2" # HF PyTorch checkpoint
 
 
 
 
 
 
 
 
35
 
36
+ print("πŸ”Ή Loading tokenizer...")
37
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
38
 
39
+ print("πŸ”Ή Loading model with 8-bit quantization (CPU)...")
40
+ model = AutoModelForCausalLM.from_pretrained(
41
+ model_name,
42
+ device_map="auto", # automatically place layers on CPU
43
+ load_in_8bit=True, # 8-bit quantization
44
+ trust_remote_code=True
45
+ )
46
+ print("βœ… Model loaded")
47
 
48
+ # ---- PIPELINE ----
49
+ llm_pipeline = pipeline(
50
+ task="text-generation",
51
+ model=model,
52
+ tokenizer=tokenizer,
53
+ max_new_tokens=256,
54
+ temperature=0.2,
55
+ do_sample=False,
56
+ top_p=0.9,
57
+ repetition_penalty=1.2,
58
+ eos_token_id=tokenizer.eos_token_id,
59
+ return_full_text=False,
60
+ )
61
+ hf_llm = HuggingFacePipeline(pipeline=llm_pipeline)
62
+ print("βœ… Pipeline ready")
63
+
64
+ # ---- AGENT ----
65
+ agent = initialize_agent(
66
+ tools=tools,
67
+ llm=hf_llm,
68
+ agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
69
+ verbose=True,
70
+ handle_parsing_errors=True,
71
+ )
72
+ print("βœ… Agent initialized")
73
+
74
+ return agent
75
+
76
+ except Exception as e:
77
+ print("❌ Failed to build QA pipeline")
78
+ traceback.print_exc()
79
+ return None
80
 
81
 
82
+ # Build pipeline at import
83
+ agent = build_qa()
 
 
 
 
84
 
85
 
86
+ def get_response(query: str) -> str:
87
  if agent is None:
88
  return "⚠️ QA pipeline not initialized."
89
  try:
90
+ return agent.invoke({"input": query})
91
  except Exception as e:
92
  return f"❌ QA run failed: {e}"