Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| import requests | |
| from bs4 import BeautifulSoup | |
| app = FastAPI() | |
| MODEL_NAME = "microsoft/phi-1_5" | |
| print("Loading model...") | |
| torch.set_num_threads(2) | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| dtype=torch.float32, | |
| low_cpu_mem_usage=True | |
| ) | |
| model.to("cpu") | |
| print("Model loaded!") | |
| # -------- REQUEST SCHEMA -------- | |
| class RequestData(BaseModel): | |
| prompt: str | |
| history: list = [] | |
| use_search: bool = True | |
| # -------- ROOT ROUTE -------- | |
| def home(): | |
| return {"message": "API is running"} | |
| # -------- TOOL 1: SEARCH -------- | |
| def search_links(query): | |
| url = f"https://duckduckgo.com/html/?q={query}" | |
| headers = {"User-Agent": "Mozilla/5.0"} | |
| try: | |
| res = requests.get(url, headers=headers, timeout=10) | |
| soup = BeautifulSoup(res.text, "html.parser") | |
| links = [] | |
| for a in soup.select("a.result__a"): | |
| href = a.get("href") | |
| if href: | |
| links.append(href) | |
| return links[:3] | |
| except: | |
| return [] | |
| # -------- TOOL 2: OPEN PAGE -------- | |
| def extract_page_text(url): | |
| try: | |
| res = requests.get(url, timeout=10, headers={"User-Agent": "Mozilla/5.0"}) | |
| soup = BeautifulSoup(res.text, "html.parser") | |
| for tag in soup(["script", "style"]): | |
| tag.decompose() | |
| text = soup.get_text(separator=" ") | |
| return text[:2000] | |
| except: | |
| return "" | |
| # -------- TOOL 3: BROWSE -------- | |
| def browse_web(query): | |
| links = search_links(query) | |
| contents = [] | |
| for link in links: | |
| page = extract_page_text(link) | |
| if page: | |
| contents.append(page) | |
| return "\n\n".join(contents[:3]) | |
| # -------- MEMORY BUILDER (FIXED) -------- | |
| def build_prompt(prompt, history): | |
| convo = "" | |
| for msg in history: | |
| if isinstance(msg, dict): | |
| if msg.get("role") == "user": | |
| convo += f"User: {msg.get('content')}\n" | |
| elif msg.get("role") == "assistant": | |
| convo += f"Assistant: {msg.get('content')}\n" | |
| convo += f"User: {prompt}\nAssistant:" | |
| return convo | |
| # -------- GENERATION (FIXED OUTPUT) -------- | |
| def generate_text(prompt): | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| inputs["input_ids"], | |
| max_new_tokens=120, | |
| temperature=0.7, | |
| do_sample=True | |
| ) | |
| full_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Remove prompt from output | |
| return full_text[len(prompt):].strip() | |
| # -------- AGENT LOOP -------- | |
| def agent(prompt, history, use_search=True): | |
| base_prompt = build_prompt(prompt, history) | |
| decision_prompt = f""" | |
| You are an AI agent. | |
| User question: | |
| {prompt} | |
| Should you search the web? Answer YES or NO. | |
| """ | |
| decision = generate_text(decision_prompt).lower() | |
| if use_search and "yes" in decision: | |
| web_data = browse_web(prompt) | |
| final_prompt = f""" | |
| You are an AI assistant with access to web data. | |
| Conversation: | |
| {base_prompt} | |
| Web Data: | |
| {web_data} | |
| Answer clearly and accurately: | |
| """ | |
| else: | |
| final_prompt = base_prompt | |
| return generate_text(final_prompt) | |
| # -------- API ENDPOINT -------- | |
| def generate(data: RequestData): | |
| response = agent( | |
| prompt=data.prompt, | |
| history=data.history, | |
| use_search=data.use_search | |
| ) | |
| return {"response": response} |