PraneshJs commited on
Commit
2a078d1
·
verified ·
1 Parent(s): 8d19e5d

added app.py to hf space

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from openai import AzureOpenAI
4
+ from sentence_transformers import SentenceTransformer
5
+ from langchain.document_loaders import PyPDFLoader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.vectorstores import Chroma
8
+
9
+ # Load PDF (Tiruvāsagam)
10
+ loader = PyPDFLoader("tiru.pdf")
11
+ docs = loader.load()
12
+
13
+ # Split into chunks
14
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
15
+ chunks = splitter.split_documents(docs)
16
+
17
+ # Local embedding model (Tamil capable)
18
+ embedding_model = SentenceTransformer("intfloat/multilingual-e5-large")
19
+ def embed(texts): return embedding_model.encode(texts, convert_to_numpy=True)
20
+
21
+ # Store in Chroma
22
+ vectorstore = Chroma.from_documents(chunks, embedding_function=embed)
23
+ retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3})
24
+
25
+ # Azure OpenAI client
26
+ client = AzureOpenAI(
27
+ api_key=os.getenv("AZURE_OPENAI_API_KEY"),
28
+ api_version="2025-01-01-preview",
29
+ azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
30
+ )
31
+
32
+ # Chat function
33
+ def chat_fn(message, history):
34
+ # Retrieve relevant chunks
35
+ docs = retriever.get_relevant_documents(message)
36
+ context = "\n\n".join([d.page_content for d in docs])
37
+
38
+ # Call Azure OpenAI (GPT-4)
39
+ completion = client.chat.completions.create(
40
+ model="gpt-4.1", # your Azure deployment name
41
+ messages=[
42
+ {"role": "system", "content": "You are a helpful assistant answering from Tiruvāsagam.Message in tamil maximum the tamil should be basic without any spelling and grammer mistakes in it. Be as token effecient. Mainly for any question not related to Thiruvasagam or lord Shiva or Author manichavasagar just reply I have no knowlage or I don't know i tamil use only tamil i say it again"},
43
+ {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {message}"}
44
+ ],
45
+ temperature=0.8,
46
+ max_token=500
47
+ )
48
+ return completion.choices[0].message.content
49
+
50
+ # Gradio UI
51
+ chatbot = gr.ChatInterface(
52
+ fn=chat_fn,
53
+ title="திருவாசகம் RAG Chatbot",
54
+ description="திருவாசகத்தை அடிப்படையாகக் கொண்டு கேள்விகளை கேளுங்கள் (Tamil/English supported)."
55
+ )
56
+
57
+ if __name__ == "__main__":
58
+ chatbot.launch(server_name="0.0.0.0", server_port=7860, debug=True)