mohamedachraf commited on
Commit
fc83ecf
·
1 Parent(s): 7a5d2ae

modify the pipeline

Browse files
Files changed (1) hide show
  1. app.py +28 -19
app.py CHANGED
@@ -19,7 +19,7 @@ from langchain.prompts.prompt import PromptTemplate
19
  from langchain.vectorstores.base import VectorStoreRetriever
20
 
21
  import torch
22
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
23
  from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
24
 
25
  from transformers import TextIteratorStreamer
@@ -28,8 +28,10 @@ import os
28
  import tempfile
29
 
30
 
31
- # Prompt template
32
- template = """Context: {context}
 
 
33
 
34
  Question: {question}
35
 
@@ -37,15 +39,16 @@ Answer:"""
37
  QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
38
 
39
 
40
- # Load Phi-2 model from hugging face hub
41
- model_id = "microsoft/phi-2"
 
 
 
 
42
 
43
- tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
44
- if tokenizer.pad_token is None:
45
- tokenizer.pad_token = tokenizer.eos_token
46
-
47
- model = AutoModelForCausalLM.from_pretrained(
48
- model_id, torch_dtype=torch.float32, trust_remote_code=True
49
  )
50
 
51
  # sentence transformers to be used in vector store
@@ -121,6 +124,12 @@ def get_retrieval_qa_chain(text_file, hf_model):
121
  if text_file != default_text_file or default_text_file is None:
122
  if text_file is not None and os.path.exists(text_file):
123
  retriever, vectorstore = prepare_vector_store_retriever(text_file)
 
 
 
 
 
 
124
 
125
  chain = RetrievalQA.from_chain_type(
126
  llm=hf_model,
@@ -137,15 +146,14 @@ def generate(question, answer, text_file, max_new_tokens):
137
  return
138
 
139
  try:
140
- # Create pipeline without streamer first to test
141
  phi2_pipeline = pipeline(
142
- "text-generation",
143
  model=model,
144
  tokenizer=tokenizer,
145
  max_new_tokens=max_new_tokens,
146
- do_sample=False, # ← greedy
147
- pad_token_id=tokenizer.eos_token_id,
148
- eos_token_id=tokenizer.eos_token_id,
149
  )
150
 
151
  hf_model = HuggingFacePipeline(pipeline=phi2_pipeline)
@@ -192,13 +200,14 @@ def upload_file(file):
192
  with gr.Blocks() as demo:
193
  gr.Markdown(
194
  """
195
- # Retrieval Augmented Generation with Phi-2: Question Answering demo
196
- ### This demo uses the Phi-2 language model and Retrieval Augmented Generation (RAG). It allows you to upload a txt or PDF file and ask the model questions related to the content of that file.
197
  ### Features:
198
  - Support for both PDF and text files
199
  - Retrieval-based question answering using document context
 
200
  ### To get started, upload a text (.txt) or PDF (.pdf) file using the upload button below.
201
- The context size of the Phi-2 model is 2048 tokens, so large documents are automatically split into chunks.
202
  Retrieval Augmented Generation (RAG) enables us to retrieve just the few small chunks of the document that are relevant to your query and inject it into our prompt.
203
  The model is then able to answer questions by incorporating knowledge from the newly provided document.
204
  """
 
19
  from langchain.vectorstores.base import VectorStoreRetriever
20
 
21
  import torch
22
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
23
  from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
24
 
25
  from transformers import TextIteratorStreamer
 
28
  import tempfile
29
 
30
 
31
+ # Prompt template optimized for Flan-T5
32
+ template = """Answer the question based on the context below.
33
+
34
+ Context: {context}
35
 
36
  Question: {question}
37
 
 
39
  QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
40
 
41
 
42
+ # Load Flan-T5 model from hugging face hub - excellent for CPU and Q&A tasks
43
+ # Alternative popular CPU-friendly models you can try:
44
+ # - "google/flan-t5-small" (faster, smaller)
45
+ # - "google/flan-t5-large" (better quality, slower)
46
+ # - "microsoft/DialoGPT-medium" (conversational)
47
+ model_id = "google/flan-t5-base"
48
 
49
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
50
+ model = AutoModelForSeq2SeqLM.from_pretrained(
51
+ model_id, torch_dtype=torch.float32
 
 
 
52
  )
53
 
54
  # sentence transformers to be used in vector store
 
124
  if text_file != default_text_file or default_text_file is None:
125
  if text_file is not None and os.path.exists(text_file):
126
  retriever, vectorstore = prepare_vector_store_retriever(text_file)
127
+ else:
128
+ # Create a dummy retriever if no file is available
129
+ dummy_doc = Document(page_content="No document loaded. Please upload a file to get started.")
130
+ dummy_vectorstore = FAISS.from_documents([dummy_doc], embeddings)
131
+ retriever = VectorStoreRetriever(vectorstore=dummy_vectorstore, search_kwargs={"k": 1})
132
+ vectorstore = dummy_vectorstore
133
 
134
  chain = RetrievalQA.from_chain_type(
135
  llm=hf_model,
 
146
  return
147
 
148
  try:
149
+ # Create pipeline for text2text generation (Flan-T5)
150
  phi2_pipeline = pipeline(
151
+ "text2text-generation",
152
  model=model,
153
  tokenizer=tokenizer,
154
  max_new_tokens=max_new_tokens,
155
+ do_sample=False,
156
+ return_full_text=False,
 
157
  )
158
 
159
  hf_model = HuggingFacePipeline(pipeline=phi2_pipeline)
 
200
  with gr.Blocks() as demo:
201
  gr.Markdown(
202
  """
203
+ # Retrieval Augmented Generation with Flan-T5: Question Answering demo
204
+ ### This demo uses Google's Flan-T5 language model and Retrieval Augmented Generation (RAG). It allows you to upload a txt or PDF file and ask the model questions related to the content of that file.
205
  ### Features:
206
  - Support for both PDF and text files
207
  - Retrieval-based question answering using document context
208
+ - Optimized for CPU performance using Flan-T5-Base model
209
  ### To get started, upload a text (.txt) or PDF (.pdf) file using the upload button below.
210
+ The Flan-T5 model is efficient and works well on CPU, making it perfect for document Q&A tasks.
211
  Retrieval Augmented Generation (RAG) enables us to retrieve just the few small chunks of the document that are relevant to your query and inject it into our prompt.
212
  The model is then able to answer questions by incorporating knowledge from the newly provided document.
213
  """