Spaces:
Running
Running
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_openai import OpenAIEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_pinecone import PineconeVectorStore | |
| from langchain_core.documents import Document | |
| from langchain_openai import ChatOpenAI | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_core.prompts import PromptTemplate | |
| from uuid import uuid4 | |
| from prompt import * | |
| from pydantic import BaseModel, Field | |
| from dotenv import load_dotenv | |
| import os | |
| from langchain_core.tools import tool | |
| import unicodedata | |
| load_dotenv() | |
| index_name = os.environ.get("INDEX_NAME") | |
| # Global initialization | |
| embedding_model = "text-embedding-3-small" | |
| embedding = OpenAIEmbeddings(model=embedding_model) | |
| vector_store = PineconeVectorStore(index=index_name, embedding=embedding) | |
| def get_vectorstore(text_chunk,index,title,model="text-embedding-3-small"): | |
| try: | |
| embedding = OpenAIEmbeddings(model=model) | |
| print("loaded embedding") | |
| vector_store = PineconeVectorStore(index=index, embedding=embedding) | |
| print("loaded vector store") | |
| document = Document( | |
| page_content=text_chunk, | |
| metadata={"title": title} | |
| ) | |
| print("loaded document") | |
| uuid = f"{title}_{uuid4()}" | |
| vector_store.add_documents(documents=[document], ids=[uuid]) | |
| print("added document") | |
| return {"filename_id":uuid} | |
| except Exception as e: | |
| print(e) | |
| return False | |
| def retreive_context(query:str,index:str, model="text-embedding-3-small",vector_store=None): | |
| try: | |
| #vector_store = PineconeVectorStore(index=index, embedding=embedding) | |
| retriever = vector_store.as_retriever( | |
| search_type="similarity_score_threshold", | |
| search_kwargs={"k": 3, "score_threshold": 0.5}, | |
| ) | |
| return retriever.invoke(query) | |
| except Exception as e: | |
| print(e) | |
| return False | |
| llm = ChatOpenAI(model="gpt-4o-mini", max_tokens=300, temperature=0.5) | |
| def generate_stream(query:str,messages = [], model = "gpt-4o-mini", max_tokens = 300, temperature = 0.5,index_name="",stream=True,vector_store=None): | |
| try: | |
| print("init chat") | |
| print("init template") | |
| prompt = PromptTemplate.from_template(template) | |
| print("retreiving context") | |
| context = retreive_context(query=query,index=index_name,vector_store=vector_store) | |
| print(f"Context: {context}") | |
| llm_chain = prompt | llm | StrOutputParser() | |
| print("streaming") | |
| if stream: | |
| return llm_chain.stream({"context":context,"history":messages,"query":query}) | |
| else: | |
| return llm.invoke(query) | |
| except Exception as e: | |
| print(e) | |
| return False |