Pranjal Gupta commited on
Commit
d3935d3
·
1 Parent(s): 60e1ea2

changing model as per huggingFace

Browse files
Files changed (1) hide show
  1. app.py +20 -1
app.py CHANGED
@@ -4,11 +4,14 @@ import time
4
  import chromadb
5
  from langchain_chroma import Chroma
6
  import transformers
 
7
  from langchain_huggingface import HuggingFaceEmbeddings
8
  from langchain_core.prompts import PromptTemplate
9
  from langchain_core.output_parsers import StrOutputParser
10
  from langchain_ollama import ChatOllama
11
  from langchain_core.documents import Document
 
 
12
 
13
  # Initialize in-memory ChromaDB client
14
  # This client runs entirely within the app.py script.
@@ -58,7 +61,23 @@ def using_ollama_model(retriever, query, results, conversation_history):
58
 
59
  doc_texts = "\\n".join([doc.page_content for doc in results])
60
 
61
- llm = ChatOllama(model="llama3.2", temperature=0.4, num_predict=512)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  rag_chain = template | llm | StrOutputParser()
64
 
 
4
  import chromadb
5
  from langchain_chroma import Chroma
6
  import transformers
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
8
  from langchain_huggingface import HuggingFaceEmbeddings
9
  from langchain_core.prompts import PromptTemplate
10
  from langchain_core.output_parsers import StrOutputParser
11
  from langchain_ollama import ChatOllama
12
  from langchain_core.documents import Document
13
+ from langchain_community.llms import HuggingFacePipeline
14
+
15
 
16
  # Initialize in-memory ChromaDB client
17
  # This client runs entirely within the app.py script.
 
61
 
62
  doc_texts = "\\n".join([doc.page_content for doc in results])
63
 
64
+ # llm = ChatOllama(model="llama3.2", temperature=0.4, num_predict=512)
65
+ tokenizer = AutoTokenizer.from_pretrained("llama3.2")
66
+ model = AutoModelForCausalLM.from_pretrained("llama3.2")
67
+ pipe = pipeline(
68
+ "text-generation",
69
+ model=model,
70
+ tokenizer=tokenizer,
71
+ max_new_tokens=256,
72
+ do_sample=True,
73
+ temperature=0.7,
74
+ top_p=0.95,
75
+ repetition_penalty=1.2
76
+ )
77
+
78
+ # Use the pipeline with LangChain's HuggingFacePipeline
79
+ llm = HuggingFacePipeline(pipeline=pipe)
80
+
81
 
82
  rag_chain = template | llm | StrOutputParser()
83