umerforsure commited on
Commit
267a2df
Β·
1 Parent(s): 9584b5a

πŸ› Fix: improved model output post-processing to avoid empty answers

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -12,13 +12,14 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
12
  from langchain.text_splitter import RecursiveCharacterTextSplitter
13
  from langchain_core.documents import Document
14
 
15
- # Load Reasoning Model (lightweight + CPU friendly)
16
- model_id = "google/flan-t5-base"
 
17
  tokenizer = AutoTokenizer.from_pretrained(model_id)
18
- model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
19
 
20
  reasoning_pipeline = pipeline(
21
- "text2text-generation",
22
  model=model,
23
  tokenizer=tokenizer,
24
  max_new_tokens=512,
@@ -33,6 +34,7 @@ vectorstore = None
33
  # Summarizer
34
  summary_pipeline = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
35
 
 
36
  def clean_text(text):
37
  lines = text.split("\n")
38
  cleaned = []
 
12
  from langchain.text_splitter import RecursiveCharacterTextSplitter
13
  from langchain_core.documents import Document
14
 
15
+ #extra
16
+ # Load Reasoning Model (CPU-friendly with better reasoning)
17
+ model_id = "microsoft/phi-1_5"
18
  tokenizer = AutoTokenizer.from_pretrained(model_id)
19
+ model = AutoModelForCausalLM.from_pretrained(model_id)
20
 
21
  reasoning_pipeline = pipeline(
22
+ "text-generation",
23
  model=model,
24
  tokenizer=tokenizer,
25
  max_new_tokens=512,
 
34
  # Summarizer
35
  summary_pipeline = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
36
 
37
+
38
  def clean_text(text):
39
  lines = text.split("\n")
40
  cleaned = []