PraneshJs commited on
Commit
64d7b51
·
verified ·
1 Parent(s): 25269f7

fixed embedding function

Browse files
Files changed (1) hide show
  1. app.py +5 -8
app.py CHANGED
@@ -1,10 +1,10 @@
1
  import os
2
  import gradio as gr
3
  from openai import AzureOpenAI
4
- from sentence_transformers import SentenceTransformer
5
  from langchain_community.document_loaders import PyPDFLoader
6
  from langchain_text_splitters import RecursiveCharacterTextSplitter
7
  from langchain_community.vectorstores import Chroma
 
8
 
9
  # Load PDF (Tiruvāsagam)
10
  loader = PyPDFLoader("tiru.pdf")
@@ -15,27 +15,24 @@ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
15
  chunks = splitter.split_documents(docs)
16
 
17
  # Local embedding model (Tamil capable)
18
- embedding_model = SentenceTransformer("intfloat/multilingual-e5-large")
19
- def embed(texts): return embedding_model.encode(texts, convert_to_numpy=True)
20
 
21
  # Store in Chroma
22
- vectorstore = Chroma.from_documents(chunks, embedding_function=embed)
23
  retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3})
24
 
25
  # Azure OpenAI client
26
  client = AzureOpenAI(
27
- api_key=os.getenv("AZURE_OPENAI_API_KEY"),
28
  api_version="2025-01-01-preview",
29
- azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
30
  )
31
 
32
  # Chat function
33
  def chat_fn(message, history):
34
- # Retrieve relevant chunks
35
  docs = retriever.get_relevant_documents(message)
36
  context = "\n\n".join([d.page_content for d in docs])
37
 
38
- # Call Azure OpenAI (GPT-4)
39
  completion = client.chat.completions.create(
40
  model="gpt-4.1", # your Azure deployment name
41
  messages=[
 
1
  import os
2
  import gradio as gr
3
  from openai import AzureOpenAI
 
4
  from langchain_community.document_loaders import PyPDFLoader
5
  from langchain_text_splitters import RecursiveCharacterTextSplitter
6
  from langchain_community.vectorstores import Chroma
7
+ from langchain_community.embeddings import HuggingFaceEmbeddings
8
 
9
  # Load PDF (Tiruvāsagam)
10
  loader = PyPDFLoader("tiru.pdf")
 
15
  chunks = splitter.split_documents(docs)
16
 
17
  # Local embedding model (Tamil capable)
18
+ embedding_model = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
 
19
 
20
  # Store in Chroma
21
+ vectorstore = Chroma.from_documents(chunks, embedding=embedding_model)
22
  retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3})
23
 
24
  # Azure OpenAI client
25
  client = AzureOpenAI(
26
+ api_key=os.getenv("AZURE_OPENAI_API_KEY").strip(),
27
  api_version="2025-01-01-preview",
28
+ azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip()
29
  )
30
 
31
  # Chat function
32
  def chat_fn(message, history):
 
33
  docs = retriever.get_relevant_documents(message)
34
  context = "\n\n".join([d.page_content for d in docs])
35
 
 
36
  completion = client.chat.completions.create(
37
  model="gpt-4.1", # your Azure deployment name
38
  messages=[