Zynara
/

Billy

@@ -1,137 +1,411 @@
 import torch
-from fastapi import FastAPI
-from pydantic import BaseModel
-from duckduckgo_search import ddg
-import chromadb
 from sentence_transformers import SentenceTransformer
-from transformers import AutoTokenizer, AutoModelForCausalLM
 # ===============================
-# 1️⃣ Load Model (Llama-3-8B-Instruct)
 # ===============================
-MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
 print("🚀 Loading Billy AI model...")
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_ID,
-    torch_dtype=torch.float32,  # CPU-friendly
-    device_map="auto"
-)
-def generate_text(prompt: str, max_tokens: int = 512) -> str:
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    output = model.generate(
-        **inputs,
-        max_new_tokens=max_tokens,
-        do_sample=True,
-        temperature=0.7,
-        top_p=0.9
-    )
-    return tokenizer.decode(output[0], skip_special_tokens=True)
 # ===============================
-# 2️⃣ Setup RAG (Memory + Search)
 # ===============================
-db = chromadb.PersistentClient(path="./billy_rag_db")
 try:
-    collection = db.get_collection("billy_rag")
-except:
-    collection = db.create_collection("billy_rag")
-embedder = SentenceTransformer("all-MiniLM-L6-v2")
-def search_web(query: str):
     try:
-        results = ddg(query, max_results=3)
-        return [r.get("body") or r.get("snippet") or "" for r in results if r]
-    except:
-        return []
-def store_knowledge(text: str):
-    vec = embedder.encode(text).tolist()
     try:
-        collection.add(documents=[text], embeddings=[vec], ids=[str(abs(hash(text)))])
-    except:
         pass
-def retrieve_knowledge(query: str) -> str:
-    vec = embedder.encode(query).tolist()
-    results = collection.query(query_embeddings=[vec], n_results=3)
-    return " ".join(results["documents"][0]) if results and results["documents"] else ""
 # ===============================
-# 3️⃣ Tool Functions
 # ===============================
 def summarize_text(text: str) -> str:
-    prompt = f"Summarize the following text in simple terms:\n\n{text}\n\nSummary:"
-    return generate_text(prompt, max_tokens=200)
 def translate_text(text: str, lang: str) -> str:
-    prompt = f"Translate the following text to {lang}:\n\n{text}\n\nTranslation:"
-    return generate_text(prompt, max_tokens=200)
 def explain_code(code: str) -> str:
-    prompt = f"Explain the following code in simple terms:\n\n```{code}```\n\nExplanation:"
-    return generate_text(prompt, max_tokens=300)
 # ===============================
-# 4️⃣ FastAPI App
 # ===============================
-app = FastAPI(title="Billy AI - Free Chatbot")
-class Query(BaseModel):
-    message: str
-    user_id: str = "anonymous"
-@app.post("/chat")
-def chat(req: Query):
-    user_msg = req.message.strip()
-    # --- Special Commands ---
-    if user_msg.lower().startswith("/summarize "):
-        return {"response": summarize_text(user_msg[11:])}
-    if user_msg.lower().startswith("/translate "):
-        try:
-            lang, text = user_msg[10:].split(" ", 1)
-            return {"response": translate_text(text, lang)}
-        except:
-            return {"response": "Format: /translate <language> <text>"}
-    if user_msg.lower().startswith("/explaincode "):
-        return {"response": explain_code(user_msg[13:])}
-    # --- Search & RAG ---
-    local_knowledge = retrieve_knowledge(user_msg)
-    if not local_knowledge:
-        web_results = search_web(user_msg)
-        for r in web_results:
-            if r.strip():
-                store_knowledge(r)
-        local_knowledge = " ".join(web_results)
-    # --- Personality & Context ---
-    context = (
-        "You are Billy AI — a helpful, witty, and slightly funny AI assistant. "
-        "You are a bit smarter than GPT-3.5, but not too advanced. "
-        "When answering, be friendly, concise, and give useful info. "
-        f"Use this info if helpful: {local_knowledge}\n\n"
-        f"User: {user_msg}\nAssistant:"
-    )
-    reply = generate_text(context)
-    return {"response": reply.strip()}
-@app.get("/")
-def home():
-    return {"message": "Billy AI is running and ready to chat!"}
 # ===============================
-# 5️⃣ Run
 # ===============================
 if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000)

+import hashlib
+import time
+from typing import List, Dict, Any, Tuple, Optional
 import torch
+import gradio as gr
+# Optional deps (web search + vector store)
+ddg = None
+DDGS = None
+try:
+    from duckduckgo_search import ddg as _ddg
+    ddg = _ddg
+except Exception:
+    try:
+        from duckduckgo_search import DDGS as _DDGS
+        DDGS = _DDGS
+    except Exception:
+        ddg = None
+        DDGS = None
+try:
+    import chromadb
+except Exception:
+    chromadb = None
 from sentence_transformers import SentenceTransformer
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+)
+# Optional quantization (4-bit on GPU)
+BITSANDBYTES_AVAILABLE = False
+try:
+    from transformers import BitsAndBytesConfig
+    BITSANDBYTES_AVAILABLE = True
+except Exception:
+    BITSANDBYTES_AVAILABLE = False
 # ===============================
+# 1) Model Setup (Llama-3.1-8B-Instruct)
 # ===============================
+MODEL_ID = os.getenv("MODEL_ID", "meta-llama/Meta-Llama-3.1-8B-Instruct")
+HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
 print("🚀 Loading Billy AI model...")
+# Tokenizer
+try:
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
+except TypeError:
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_auth_token=HF_TOKEN)
+if tokenizer.pad_token_id is None:
+    # Fallback to eos as pad if not set
+    tokenizer.pad_token_id = tokenizer.eos_token_id
+def _gpu_bf16_supported() -> bool:
+    try:
+        return torch.cuda.is_available() and torch.cuda.is_bf16_supported()
+    except Exception:
+        return False
+def _model_device(m) -> torch.device:
+    try:
+        return next(m.parameters()).device
+    except Exception:
+        return torch.device("cpu")
+load_kwargs: Dict[str, Any] = {}
+if torch.cuda.is_available():
+    if BITSANDBYTES_AVAILABLE:
+        print("⚙️ Using 4-bit quantization (bitsandbytes).")
+        compute_dtype = torch.bfloat16 if _gpu_bf16_supported() else torch.float16
+        bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=compute_dtype,
+        )
+        load_kwargs.update(dict(device_map="auto", quantization_config=bnb_config, token=HF_TOKEN))
+    else:
+        print("⚙️ No bitsandbytes: loading in half precision on GPU.")
+        load_kwargs.update(dict(device_map="auto",
+                                torch_dtype=torch.bfloat16 if _gpu_bf16_supported() else torch.float16,
+                                token=HF_TOKEN))
+else:
+    print("⚠️ No GPU detected: CPU load (slow). Consider a smaller model or enable GPU runtime.")
+    load_kwargs.update(dict(torch_dtype=torch.float32, token=HF_TOKEN))
+# Load model with fallbacks for auth kwarg differences
+try:
+    model = AutoModelForCausalLM.from_pretrained(MODEL_ID, **load_kwargs)
+except TypeError:
+    load_kwargs.pop("token", None)
+    try:
+        model = AutoModelForCausalLM.from_pretrained(MODEL_ID, **load_kwargs)
+    except TypeError:
+        model = AutoModelForCausalLM.from_pretrained(MODEL_ID, use_auth_token=HF_TOKEN, **load_kwargs)
+MODEL_DEVICE = _model_device(model)
+print(f"✅ Model loaded on: {MODEL_DEVICE}")
 # ===============================
+# 2) Lightweight RAG (Embeddings + Optional Chroma + In-Memory Fallback)
 # ===============================
 try:
+    embedder = SentenceTransformer("all-MiniLM-L6-v2")
+    print("✅ Embedding model loaded.")
+except Exception as e:
+    raise RuntimeError(f"Embedding model load failed: {e}")
+# Optional Chroma persistent store; fallback to in-memory store if unavailable.
+chroma_client = None
+collection = None
+if chromadb is not None:
+    try:
+        chroma_client = chromadb.PersistentClient(path="./billy_rag_db")
+        try:
+            collection = chroma_client.get_collection("billy_rag")
+        except Exception:
+            collection = chroma_client.create_collection("billy_rag")
+        print("✅ ChromaDB ready.")
+    except Exception as e:
+        print(f"⚠️ ChromaDB init failed: {e}; falling back to in-memory store.")
+# In-memory store: list of dicts {text, embedding}
+memory_store: List[Dict[str, Any]] = []
+def _stable_id(text: str) -> str:
+    return hashlib.sha1(text.encode("utf-8")).hexdigest()
+def search_web(query: str, max_results: int = 3) -> List[str]:
+    # Try legacy ddg function
     try:
+        if ddg is not None:
+            try:
+                results = ddg(query, max_results=max_results)
+            except TypeError:
+                results = ddg(keywords=query, max_results=max_results)
+            snippets = []
+            for r in results or []:
+                if not r:
+                    continue
+                snippets.append(r.get("body") or r.get("snippet") or r.get("title") or "")
+            return [s for s in snippets if s and s.strip()]
+    except Exception:
+        pass
+    # Try modern DDGS client
     try:
+        if DDGS is not None:
+            with DDGS() as d:
+                results = list(d.text(query, max_results=max_results))
+            snippets = []
+            for r in results or []:
+                if not r:
+                    continue
+                # r keys differ slightly in DDGS()
+                snippets.append(r.get("body") or r.get("snippet") or r.get("title") or r.get("href") or "")
+            return [s for s in snippets if s and s.strip()]
+    except Exception:
         pass
+    return []
+def store_knowledge(text: str):
+    if not text or not text.strip():
+        return
+    try:
+        vec = embedder.encode(text).tolist()
+    except Exception:
+        return
+    if collection is not None:
+        try:
+            collection.add(
+                documents=[text],
+                embeddings=[vec],
+                ids=[_stable_id(text)],
+                metadatas=[{"source": "web_or_local"}],
+            )
+            return
+        except Exception:
+            pass
+    # Fallback: in-memory
+    memory_store.append({"text": text, "embedding": vec})
+def _cosine(a: List[float], b: List[float]) -> float:
+    s = 0.0
+    na = 0.0
+    nb = 0.0
+    for x, y in zip(a, b):
+        s += x * y
+        na += x * x
+        nb += y * y
+    na = na ** 0.5 or 1.0
+    nb = nb ** 0.5 or 1.0
+    return s / (na * nb)
+def retrieve_knowledge(query: str, k: int = 5) -> str:
+    try:
+        qvec = embedder.encode(query).tolist()
+    except Exception:
+        return ""
+    # Prefer Chroma if available
+    if collection is not None:
+        try:
+            res = collection.query(query_embeddings=[qvec], n_results=k)
+            docs = res.get("documents", [])
+            if docs and docs[0]:
+                return " ".join(docs[0])
+        except Exception:
+            pass
+    # In-memory cosine top-k
+    if not memory_store:
+        return ""
+    scored: List[Tuple[str, float]] = []
+    for item in memory_store:
+        scored.append((item["text"], _cosine(qvec, item["embedding"])))
+    scored.sort(key=lambda x: x[1], reverse=True)
+    return " ".join([t for t, _ in scored[:k]])
 # ===============================
+# 3) Generation Utilities
 # ===============================
+def build_messages(system_prompt: str, chat_history: List[Tuple[str, str]], user_prompt: str) -> List[Dict[str, str]]:
+    messages: List[Dict[str, str]] = [{"role": "system", "content": system_prompt}]
+    # chat_history is a list of (user, assistant) tuples
+    for u, a in chat_history or []:
+        if u:
+            messages.append({"role": "user", "content": u})
+        if a:
+            messages.append({"role": "assistant", "content": a})
+    messages.append({"role": "user", "content": user_prompt})
+    return messages
+def apply_chat_template_from_messages(messages: List[Dict[str, str]]) -> str:
+    try:
+        return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    except Exception:
+        # Fallback to simple instruct style if no template provided
+        sys = ""
+        user = ""
+        # Extract the last system and user message for a minimal fallback
+        for m in messages:
+            if m["role"] == "system":
+                sys = m["content"]
+            elif m["role"] == "user":
+                user = m["content"]
+        sys = (sys or "").strip()
+        user = (user or "").strip()
+        prefix = f"{sys}\n\n" if sys else ""
+        return f"{prefix}User: {user}\nAssistant:"
+def _get_eos_token_id():
+    eos_id = getattr(tokenizer, "eos_token_id", None)
+    if isinstance(eos_id, list) and eos_id:
+        return eos_id[0]
+    return eos_id
+def generate_text(prompt_text: str,
+                  max_tokens: int = 600,
+                  temperature: float = 0.6,
+                  top_p: float = 0.9) -> str:
+    inputs = tokenizer(prompt_text, return_tensors="pt")
+    inputs = {k: v.to(MODEL_DEVICE) for k, v in inputs.items()}
+    output_ids = model.generate(
+        **inputs,
+        max_new_tokens=min(max_tokens, 2048),
+        do_sample=True,
+        temperature=temperature,
+        top_p=top_p,
+        pad_token_id=tokenizer.pad_token_id,
+        eos_token_id=_get_eos_token_id(),
+    )
+    text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    # Best-effort: strip the prompt echo if present
+    if text.startswith(prompt_text):
+        return text[len(prompt_text):].strip()
+    return text.strip()
 def summarize_text(text: str) -> str:
+    system = "You are Billy AI — a precise, helpful summarizer."
+    user = f"Summarize the following text in simple, clear bullet points (max 6 bullets):\n\n{text}"
+    messages = build_messages(system, [], user)
+    return generate_text(apply_chat_template_from_messages(messages), max_tokens=220, temperature=0.3, top_p=0.9)
 def translate_text(text: str, lang: str) -> str:
+    system = "You are Billy AI — an expert translator."
+    user = f"Translate the following text to {lang} while preserving meaning and tone:\n\n{text}"
+    messages = build_messages(system, [], user)
+    return generate_text(apply_chat_template_from_messages(messages), max_tokens=220, temperature=0.3, top_p=0.9)
 def explain_code(code: str) -> str:
+    system = "You are Billy AI — an expert software engineer and teacher."
+    user = ("Explain the following code step by step for a mid-level developer. "
+            "Include what it does, complexity, pitfalls, and an improved version if relevant.\n\n"
+            f"{code}")
+    messages = build_messages(system, [], user)
+    return generate_text(apply_chat_template_from_messages(messages), max_tokens=400, temperature=0.5, top_p=0.9)
 # ===============================
+# 4) Chat Orchestration
 # ===============================
+def make_system_prompt(local_knowledge: str) -> str:
+    base = ("You are Billy AI — a helpful, witty, and precise assistant. "
+            "You tend to outperform GPT-3.5 on reasoning, explanation, and coding tasks. "
+            "Be concise but thorough; use bullet points for clarity; cite assumptions; avoid hallucinations.")
+    if local_knowledge:
+        base += f"\nUseful context: {local_knowledge[:3000]}"
+    return base
+def _ingest_search(query: str, max_results: int = 3) -> int:
+    snips = search_web(query, max_results=max_results)
+    for s in snips:
+        store_knowledge(s)
+    return len(snips)
+def _parse_translate_command(cmd: str) -> Tuple[Optional[str], Optional[str]]:
+    # Supports patterns:
+    # /translate <lang>: <text>
+    # /translate <lang> | <text>
+    # /translate <lang> <text>
+    rest = cmd[len("/translate"):].strip()
+    if not rest:
+        return None, None
+    # Try separators
+    for sep in [":", "|"]:
+        if sep in rest:
+            lang, text = rest.split(sep, 1)
+            return lang.strip(), text.strip()
+    parts = rest.split(None, 1)
+    if len(parts) == 2:
+        return parts[0].strip(), parts[1].strip()
+    return None, None
+def handle_message(message: str, chat_history: List[Tuple[str, str]]) -> str:
+    msg = (message or "").strip()
+    if not msg:
+        return "Please send a non-empty message."
+    # Slash commands
+    low = msg.lower()
+    if low.startswith("/summarize "):
+        return summarize_text(msg[len("/summarize "):].strip() or "Nothing to summarize.")
+    if low.startswith("/explain "):
+        return explain_code(message[len("/explain "):].strip())
+    if low.startswith("/translate"):
+        lang, txt = _parse_translate_command(message)
+        if not lang or not txt:
+            return "Usage: /translate <lang>: <text>"
+        return translate_text(txt, lang)
+    if low.startswith("/search "):
+        q = message[len("/search "):].strip()
+        if not q:
+            return "Usage: /search <query>"
+        n = _ingest_search(q, max_results=5)
+        ctx = retrieve_knowledge(q, k=5)
+        if n == 0 and not ctx:
+            return "No results found or web search unavailable."
+        return f"Ingested {n} snippet(s). Context now includes:\n\n{ctx[:1000]}"
+    if low.startswith("/remember "):
+        t = message[len("/remember "):].strip()
+        if not t:
+            return "Usage: /remember <text>"
+        store_knowledge(t)
+        return "Saved to knowledge base."
+    # RAG: retrieve related knowledge
+    local_knowledge = retrieve_knowledge(msg, k=5)
+    system_prompt = make_system_prompt(local_knowledge)
+    messages = build_messages(system_prompt, chat_history, msg)
+    prompt = apply_chat_template_from_messages(messages)
+    return generate_text(prompt, max_tokens=600, temperature=0.6, top_p=0.9)
 # ===============================
+# 5) Gradio UI
 # ===============================
+def respond(message, history):
+    # history is a list of [user, assistant] pairs
+    # Convert history to list of tuples[str, str]
+    tuples: List[Tuple[str, str]] = []
+    for turn in history or []:
+        if isinstance(turn, (list, tuple)) and len(turn) == 2:
+            u = turn[0] if turn[0] is not None else ""
+            a = turn[1] if turn[1] is not None else ""
+            tuples.append((str(u), str(a)))
+    try:
+        return handle_message(message, tuples)
+    except Exception as e:
+        return f"Error: {e}"
+with gr.Blocks(title="Billy AI") as demo:
+    gr.Markdown("## Billy AI")
+    gr.Markdown(
+        "Commands: /summarize <text>, /explain <code>, /translate <lang>: <text>, /search <query>, /remember <text>"
+    )
+    chat = gr.ChatInterface(
+        fn=respond,
+        title="Billy AI",
+        theme="soft",
+        cache_examples=False,
+    )
 if __name__ == "__main__":
+    # Share=False by default; set to True if you want a public link
+    demo.launch()