Spaces:
Sleeping
Sleeping
Pranjal Gupta
commited on
Commit
·
d3935d3
1
Parent(s):
60e1ea2
changing model as per huggingFace
Browse files
app.py
CHANGED
|
@@ -4,11 +4,14 @@ import time
|
|
| 4 |
import chromadb
|
| 5 |
from langchain_chroma import Chroma
|
| 6 |
import transformers
|
|
|
|
| 7 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 8 |
from langchain_core.prompts import PromptTemplate
|
| 9 |
from langchain_core.output_parsers import StrOutputParser
|
| 10 |
from langchain_ollama import ChatOllama
|
| 11 |
from langchain_core.documents import Document
|
|
|
|
|
|
|
| 12 |
|
| 13 |
# Initialize in-memory ChromaDB client
|
| 14 |
# This client runs entirely within the app.py script.
|
|
@@ -58,7 +61,23 @@ def using_ollama_model(retriever, query, results, conversation_history):
|
|
| 58 |
|
| 59 |
doc_texts = "\\n".join([doc.page_content for doc in results])
|
| 60 |
|
| 61 |
-
llm = ChatOllama(model="llama3.2", temperature=0.4, num_predict=512)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
rag_chain = template | llm | StrOutputParser()
|
| 64 |
|
|
|
|
| 4 |
import chromadb
|
| 5 |
from langchain_chroma import Chroma
|
| 6 |
import transformers
|
| 7 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 8 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 9 |
from langchain_core.prompts import PromptTemplate
|
| 10 |
from langchain_core.output_parsers import StrOutputParser
|
| 11 |
from langchain_ollama import ChatOllama
|
| 12 |
from langchain_core.documents import Document
|
| 13 |
+
from langchain_community.llms import HuggingFacePipeline
|
| 14 |
+
|
| 15 |
|
| 16 |
# Initialize in-memory ChromaDB client
|
| 17 |
# This client runs entirely within the app.py script.
|
|
|
|
| 61 |
|
| 62 |
doc_texts = "\\n".join([doc.page_content for doc in results])
|
| 63 |
|
| 64 |
+
# llm = ChatOllama(model="llama3.2", temperature=0.4, num_predict=512)
|
| 65 |
+
tokenizer = AutoTokenizer.from_pretrained("llama3.2")
|
| 66 |
+
model = AutoModelForCausalLM.from_pretrained("llama3.2")
|
| 67 |
+
pipe = pipeline(
|
| 68 |
+
"text-generation",
|
| 69 |
+
model=model,
|
| 70 |
+
tokenizer=tokenizer,
|
| 71 |
+
max_new_tokens=256,
|
| 72 |
+
do_sample=True,
|
| 73 |
+
temperature=0.7,
|
| 74 |
+
top_p=0.95,
|
| 75 |
+
repetition_penalty=1.2
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
# Use the pipeline with LangChain's HuggingFacePipeline
|
| 79 |
+
llm = HuggingFacePipeline(pipeline=pipe)
|
| 80 |
+
|
| 81 |
|
| 82 |
rag_chain = template | llm | StrOutputParser()
|
| 83 |
|