from fastapi import FastAPI from pydantic import BaseModel from transformers import AutoTokenizer, AutoModelForCausalLM import torch import requests from bs4 import BeautifulSoup app = FastAPI() MODEL_NAME = "microsoft/phi-1_5" print("Loading model...") torch.set_num_threads(2) tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, dtype=torch.float32, low_cpu_mem_usage=True ) model.to("cpu") print("Model loaded!") # -------- REQUEST SCHEMA -------- class RequestData(BaseModel): prompt: str history: list = [] use_search: bool = True # -------- ROOT ROUTE -------- @app.get("/") def home(): return {"message": "API is running"} # -------- TOOL 1: SEARCH -------- def search_links(query): url = f"https://duckduckgo.com/html/?q={query}" headers = {"User-Agent": "Mozilla/5.0"} try: res = requests.get(url, headers=headers, timeout=10) soup = BeautifulSoup(res.text, "html.parser") links = [] for a in soup.select("a.result__a"): href = a.get("href") if href: links.append(href) return links[:3] except: return [] # -------- TOOL 2: OPEN PAGE -------- def extract_page_text(url): try: res = requests.get(url, timeout=10, headers={"User-Agent": "Mozilla/5.0"}) soup = BeautifulSoup(res.text, "html.parser") for tag in soup(["script", "style"]): tag.decompose() text = soup.get_text(separator=" ") return text[:2000] except: return "" # -------- TOOL 3: BROWSE -------- def browse_web(query): links = search_links(query) contents = [] for link in links: page = extract_page_text(link) if page: contents.append(page) return "\n\n".join(contents[:3]) # -------- MEMORY BUILDER (FIXED) -------- def build_prompt(prompt, history): convo = "" for msg in history: if isinstance(msg, dict): if msg.get("role") == "user": convo += f"User: {msg.get('content')}\n" elif msg.get("role") == "assistant": convo += f"Assistant: {msg.get('content')}\n" convo += f"User: {prompt}\nAssistant:" return convo # -------- GENERATION (FIXED OUTPUT) -------- def generate_text(prompt): inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) with torch.no_grad(): outputs = model.generate( inputs["input_ids"], max_new_tokens=120, temperature=0.7, do_sample=True ) full_text = tokenizer.decode(outputs[0], skip_special_tokens=True) # Remove prompt from output return full_text[len(prompt):].strip() # -------- AGENT LOOP -------- def agent(prompt, history, use_search=True): base_prompt = build_prompt(prompt, history) decision_prompt = f""" You are an AI agent. User question: {prompt} Should you search the web? Answer YES or NO. """ decision = generate_text(decision_prompt).lower() if use_search and "yes" in decision: web_data = browse_web(prompt) final_prompt = f""" You are an AI assistant with access to web data. Conversation: {base_prompt} Web Data: {web_data} Answer clearly and accurately: """ else: final_prompt = base_prompt return generate_text(final_prompt) # -------- API ENDPOINT -------- @app.post("/generate") def generate(data: RequestData): response = agent( prompt=data.prompt, history=data.history, use_search=data.use_search ) return {"response": response}