File size: 2,757 Bytes
716795c
 
 
 
 
8d8d5a2
 
 
 
 
 
 
716795c
 
 
 
 
 
8d8d5a2
716795c
 
 
 
 
8d8d5a2
716795c
 
8d8d5a2
716795c
 
8d8d5a2
2d6cd36
716795c
8d8d5a2
716795c
 
 
8d8d5a2
716795c
 
cbaa3b3
edcc543
716795c
8d8d5a2
716795c
 
 
 
 
 
 
 
 
576596d
716795c
8d8d5a2
716795c
8d8d5a2
 
 
576596d
8d8d5a2
 
 
716795c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d8d5a2
716795c
 
 
 
8d8d5a2
 
716795c
 
 
 
 
8d8d5a2
576596d
716795c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import gradio as gr
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser


def build_chain():
    embed_model_id = "BAAI/bge-small-en-v1.5"
    embeddings = HuggingFaceEmbeddings(
        model_name=embed_model_id,
        model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
    )

    texts = [
        "Kragujevac is a city in central Serbia founded in the 15th century.",
        "The main industry in Kragujevac includes automotive manufacturing.",
        "Famous landmarks: The Šumarice Memorial Park and the Old Foundry Museum."
    ]

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=80)
    docs = text_splitter.create_documents(texts)

    vectorstore = FAISS.from_documents(docs, embeddings)
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

    model_id = "Qwen/Qwen2.5-1.5B-Instruct"
    tokenizer = AutoTokenizer.from_pretrained(model_id)

    # sigurnosno: ako nema pad token
    if tokenizer.pad_token_id is None:
        tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",          # menjaš u "auto" ako imaš GPU space
        torch_dtype=torch.float16
    )

    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=200,
        temperature=0.7,
        do_sample=True,
        return_full_text=False
    )

    llm = HuggingFacePipeline(pipeline=pipe)

    template = """You are a helpful assistant. Use only the provided context to answer.
If unsure, say "I don't know."

Context: {context}

Question: {question}

Answer:"""
    prompt = ChatPromptTemplate.from_template(template)

    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    return rag_chain


rag_chain = build_chain()

def answer(question: str):
    if not question.strip():
        return ""
    return rag_chain.invoke(question)


demo = gr.Interface(
    fn=answer,
    inputs=gr.Textbox(lines=2, label="Question"),
    outputs=gr.Textbox(lines=8, label="Answer"),
    title="Mini RAG demo (Kragujevac)"
)

if __name__ == "__main__":
    demo.launch()