Nolsafan commited on
Commit
716795c
·
verified ·
1 Parent(s): d22daa7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -49
app.py CHANGED
@@ -1,57 +1,60 @@
1
- from langchain_community.llms import HuggingFacePipeline
 
 
 
 
2
  from langchain_community.vectorstores import FAISS
3
  from langchain_text_splitters import RecursiveCharacterTextSplitter
4
  from langchain_core.prompts import ChatPromptTemplate
5
  from langchain_core.runnables import RunnablePassthrough
6
  from langchain_core.output_parsers import StrOutputParser
7
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
8
- import torch
9
-
10
-
11
- embed_model_id = "BAAI/bge-small-en-v1.5"
12
- embeddings = HuggingFaceEmbeddings(
13
- model_name=embed_model_id,
14
- model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
15
- )
16
-
17
 
18
- texts = [
19
- "Kragujevac is a city in central Serbia founded in the 15th century.",
20
- "The main industry in Kragujevac includes automotive manufacturing.",
21
- "Famous landmarks: The Šumarice Memorial Park and the Old Foundry Museum."
22
- ]
23
 
 
 
 
 
 
 
24
 
25
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=80)
26
- docs = text_splitter.create_documents(texts)
 
 
 
27
 
 
 
28
 
29
- vectorstore = FAISS.from_documents(docs, embeddings)
30
- retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
31
 
 
 
32
 
33
- model_id = "Qwen/Qwen2.5-0.5B-Instruct"
 
 
34
 
35
- tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 
 
 
36
 
37
- model = AutoModelForCausalLM.from_pretrained(
38
- model_id,
39
- device_map="cpu",
40
- torch_dtype=torch.float32
41
- )
42
-
43
- pipe = pipeline(
44
- "text-generation",
45
- model=model,
46
- tokenizer=tokenizer,
47
- max_new_tokens=200,
48
- temperature=0.7,
49
- do_sample=True
50
- )
51
 
52
- llm = HuggingFacePipeline(pipeline=pipe)
53
 
54
- template = """You are a helpful assistant. Use only the provided context to answer.
55
  If unsure, say "I don't know."
56
 
57
  Context: {context}
@@ -59,19 +62,34 @@ Context: {context}
59
  Question: {question}
60
 
61
  Answer:"""
62
- prompt = ChatPromptTemplate.from_template(template)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
 
 
 
 
64
 
65
- def format_docs(docs):
66
- return "\n\n".join(doc.page_content for doc in docs)
67
 
68
- rag_chain = (
69
- {"context": retriever | format_docs, "question": RunnablePassthrough()}
70
- | prompt
71
- | llm
72
- | StrOutputParser()
73
  )
74
 
75
- question = "What are some landmarks in Kragujevac?"
76
- print("Question:", question)
77
- print("Answer:", rag_chain.invoke(question))
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
4
+
5
+ from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
6
  from langchain_community.vectorstores import FAISS
7
  from langchain_text_splitters import RecursiveCharacterTextSplitter
8
  from langchain_core.prompts import ChatPromptTemplate
9
  from langchain_core.runnables import RunnablePassthrough
10
  from langchain_core.output_parsers import StrOutputParser
 
 
 
 
 
 
 
 
 
 
11
 
 
 
 
 
 
12
 
13
+ def build_chain():
14
+ embed_model_id = "BAAI/bge-small-en-v1.5"
15
+ embeddings = HuggingFaceEmbeddings(
16
+ model_name=embed_model_id,
17
+ model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
18
+ )
19
 
20
+ texts = [
21
+ "Kragujevac is a city in central Serbia founded in the 15th century.",
22
+ "The main industry in Kragujevac includes automotive manufacturing.",
23
+ "Famous landmarks: The Šumarice Memorial Park and the Old Foundry Museum."
24
+ ]
25
 
26
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=80)
27
+ docs = text_splitter.create_documents(texts)
28
 
29
+ vectorstore = FAISS.from_documents(docs, embeddings)
30
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
31
 
32
+ model_id = "Qwen/Qwen2.5-0.5B-Instruct"
33
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
34
 
35
+ # sigurnosno: ako nema pad token
36
+ if tokenizer.pad_token_id is None:
37
+ tokenizer.pad_token = tokenizer.eos_token
38
 
39
+ model = AutoModelForCausalLM.from_pretrained(
40
+ model_id,
41
+ device_map="cpu", # menjaš u "auto" ako imaš GPU space
42
+ torch_dtype=torch.float32
43
+ )
44
 
45
+ pipe = pipeline(
46
+ "text-generation",
47
+ model=model,
48
+ tokenizer=tokenizer,
49
+ max_new_tokens=200,
50
+ temperature=0.7,
51
+ do_sample=True,
52
+ return_full_text=False
53
+ )
 
 
 
 
 
54
 
55
+ llm = HuggingFacePipeline(pipeline=pipe)
56
 
57
+ template = """You are a helpful assistant. Use only the provided context to answer.
58
  If unsure, say "I don't know."
59
 
60
  Context: {context}
 
62
  Question: {question}
63
 
64
  Answer:"""
65
+ prompt = ChatPromptTemplate.from_template(template)
66
+
67
+ def format_docs(docs):
68
+ return "\n\n".join(doc.page_content for doc in docs)
69
+
70
+ rag_chain = (
71
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
72
+ | prompt
73
+ | llm
74
+ | StrOutputParser()
75
+ )
76
+ return rag_chain
77
+
78
+
79
+ rag_chain = build_chain()
80
 
81
+ def answer(question: str):
82
+ if not question.strip():
83
+ return ""
84
+ return rag_chain.invoke(question)
85
 
 
 
86
 
87
+ demo = gr.Interface(
88
+ fn=answer,
89
+ inputs=gr.Textbox(lines=2, label="Question"),
90
+ outputs=gr.Textbox(lines=8, label="Answer"),
91
+ title="Mini RAG demo (Kragujevac)"
92
  )
93
 
94
+ if __name__ == "__main__":
95
+ demo.launch()