gbakidz's picture
Update app.py
074f4df verified
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import requests
from bs4 import BeautifulSoup
app = FastAPI()
MODEL_NAME = "microsoft/phi-1_5"
print("Loading model...")
torch.set_num_threads(2)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
dtype=torch.float32,
low_cpu_mem_usage=True
)
model.to("cpu")
print("Model loaded!")
# -------- REQUEST SCHEMA --------
class RequestData(BaseModel):
prompt: str
history: list = []
use_search: bool = True
# -------- ROOT ROUTE --------
@app.get("/")
def home():
return {"message": "API is running"}
# -------- TOOL 1: SEARCH --------
def search_links(query):
url = f"https://duckduckgo.com/html/?q={query}"
headers = {"User-Agent": "Mozilla/5.0"}
try:
res = requests.get(url, headers=headers, timeout=10)
soup = BeautifulSoup(res.text, "html.parser")
links = []
for a in soup.select("a.result__a"):
href = a.get("href")
if href:
links.append(href)
return links[:3]
except:
return []
# -------- TOOL 2: OPEN PAGE --------
def extract_page_text(url):
try:
res = requests.get(url, timeout=10, headers={"User-Agent": "Mozilla/5.0"})
soup = BeautifulSoup(res.text, "html.parser")
for tag in soup(["script", "style"]):
tag.decompose()
text = soup.get_text(separator=" ")
return text[:2000]
except:
return ""
# -------- TOOL 3: BROWSE --------
def browse_web(query):
links = search_links(query)
contents = []
for link in links:
page = extract_page_text(link)
if page:
contents.append(page)
return "\n\n".join(contents[:3])
# -------- MEMORY BUILDER (FIXED) --------
def build_prompt(prompt, history):
convo = ""
for msg in history:
if isinstance(msg, dict):
if msg.get("role") == "user":
convo += f"User: {msg.get('content')}\n"
elif msg.get("role") == "assistant":
convo += f"Assistant: {msg.get('content')}\n"
convo += f"User: {prompt}\nAssistant:"
return convo
# -------- GENERATION (FIXED OUTPUT) --------
def generate_text(prompt):
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
outputs = model.generate(
inputs["input_ids"],
max_new_tokens=120,
temperature=0.7,
do_sample=True
)
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Remove prompt from output
return full_text[len(prompt):].strip()
# -------- AGENT LOOP --------
def agent(prompt, history, use_search=True):
base_prompt = build_prompt(prompt, history)
decision_prompt = f"""
You are an AI agent.
User question:
{prompt}
Should you search the web? Answer YES or NO.
"""
decision = generate_text(decision_prompt).lower()
if use_search and "yes" in decision:
web_data = browse_web(prompt)
final_prompt = f"""
You are an AI assistant with access to web data.
Conversation:
{base_prompt}
Web Data:
{web_data}
Answer clearly and accurately:
"""
else:
final_prompt = base_prompt
return generate_text(final_prompt)
# -------- API ENDPOINT --------
@app.post("/generate")
def generate(data: RequestData):
response = agent(
prompt=data.prompt,
history=data.history,
use_search=data.use_search
)
return {"response": response}