import os from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper from langchain_huggingface import HuggingFacePipeline from langchain.agents import initialize_agent, AgentType from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline from huggingface_hub import login import torch import traceback # ✅ Login to HF token = os.getenv("HF_TOKEN") print("🔑 HF_TOKEN available?", token is not None) if token: login(token=token) else: print("❌ No HF_TOKEN found in environment") def build_qa(): print("🚀 Starting QA pipeline...") # ---- 1. Tools ---- try: print("🔹 Initializing Wikipedia tool...") wiki_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=200) wiki = WikipediaQueryRun(api_wrapper=wiki_wrapper) print("🔹 Initializing Arxiv tool...") arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200) arxiv = ArxivQueryRun(api_wrapper=arxiv_wrapper) tools = [wiki, arxiv] print("✅ Tools initialized") except Exception as e: print("❌ Tools initialization failed:", e) traceback.print_exc() return None # ---- 2. Model ---- try: print("🔹 Loading Mistral 7B model...") model_name = "mistralai/Mistral-7B-Instruct-v0.3" # or your CPU-quantized version tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", # works after installing accelerate dtype=torch.float16, # instead of torch_dtype ) llm = TextGenerationPipeline( model=model, tokenizer=tokenizer, max_new_tokens=256, temperature=0.2, do_sample=False, top_p=0.9, repetition_penalty=1.2, eos_token_id=tokenizer.eos_token_id, return_full_text=False, ) hf_llm = HuggingFacePipeline(pipeline=llm) print(f"✅ Model loaded: {model_name}") except Exception as e: print("❌ Model load failed:", e) traceback.print_exc() return None # ---- 3. Agent ---- try: print("🔹 Initializing agent...") agent = initialize_agent( tools=tools, llm=hf_llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True, handle_parsing_errors=True, ) print("✅ Agent initialized") except Exception as e: print("❌ Agent initialization failed:", e) traceback.print_exc() return None print("✅ QA pipeline ready") return agent # ---- Build once ---- try: agent = build_qa() if agent: print("✅ QA pipeline built successfully:", type(agent)) else: print("❌ QA pipeline build returned None") except Exception as e: agent = None print("❌ Failed to build QA pipeline:", e) traceback.print_exc() def get_response(user_message, history): if agent is None: return "⚠️ QA pipeline not initialized." try: print("💬 User query:", user_message) response = agent.invoke({"input": user_message}) print("🤖 Agent response:", response) return response except Exception as e: print("❌ Agent execution failed:", e) traceback.print_exc() return f"❌ QA run failed: {e}"