import gradio as gr
import os
from huggingface_hub import login
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from langchain_community.llms import HuggingFacePipeline
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Load model and tokenizer (Gemma 2B or similar)
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
login(token=os.environ["HUGGINGFACE_TOKEN"])


tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto")

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=256)

llm = HuggingFacePipeline(pipeline=pipe)

# Simple prompt template
prompt = PromptTemplate.from_template("You are Krish, a wise and witty friend.\n\nUser: {question}\nKrish:")

chain = LLMChain(prompt=prompt, llm=llm)

# Gradio interface
def chat_fn(message):
    response = chain.run({"question": message})
    return response.strip()

iface = gr.Interface(fn=chat_fn, inputs="text", outputs="text", title="🦚 Meet Krish", description="A wise, witty, and compassionate friend - KrishWay")

iface.launch()