Spaces:
Sleeping
Sleeping
added app.py to hf space
Browse files
app.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import gradio as gr
|
| 3 |
+
from openai import AzureOpenAI
|
| 4 |
+
from sentence_transformers import SentenceTransformer
|
| 5 |
+
from langchain.document_loaders import PyPDFLoader
|
| 6 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 7 |
+
from langchain.vectorstores import Chroma
|
| 8 |
+
|
| 9 |
+
# Load PDF (Tiruvāsagam)
|
| 10 |
+
loader = PyPDFLoader("tiru.pdf")
|
| 11 |
+
docs = loader.load()
|
| 12 |
+
|
| 13 |
+
# Split into chunks
|
| 14 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
| 15 |
+
chunks = splitter.split_documents(docs)
|
| 16 |
+
|
| 17 |
+
# Local embedding model (Tamil capable)
|
| 18 |
+
embedding_model = SentenceTransformer("intfloat/multilingual-e5-large")
|
| 19 |
+
def embed(texts): return embedding_model.encode(texts, convert_to_numpy=True)
|
| 20 |
+
|
| 21 |
+
# Store in Chroma
|
| 22 |
+
vectorstore = Chroma.from_documents(chunks, embedding_function=embed)
|
| 23 |
+
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3})
|
| 24 |
+
|
| 25 |
+
# Azure OpenAI client
|
| 26 |
+
client = AzureOpenAI(
|
| 27 |
+
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
|
| 28 |
+
api_version="2025-01-01-preview",
|
| 29 |
+
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# Chat function
|
| 33 |
+
def chat_fn(message, history):
|
| 34 |
+
# Retrieve relevant chunks
|
| 35 |
+
docs = retriever.get_relevant_documents(message)
|
| 36 |
+
context = "\n\n".join([d.page_content for d in docs])
|
| 37 |
+
|
| 38 |
+
# Call Azure OpenAI (GPT-4)
|
| 39 |
+
completion = client.chat.completions.create(
|
| 40 |
+
model="gpt-4.1", # your Azure deployment name
|
| 41 |
+
messages=[
|
| 42 |
+
{"role": "system", "content": "You are a helpful assistant answering from Tiruvāsagam.Message in tamil maximum the tamil should be basic without any spelling and grammer mistakes in it. Be as token effecient. Mainly for any question not related to Thiruvasagam or lord Shiva or Author manichavasagar just reply I have no knowlage or I don't know i tamil use only tamil i say it again"},
|
| 43 |
+
{"role": "user", "content": f"Context:\n{context}\n\nQuestion: {message}"}
|
| 44 |
+
],
|
| 45 |
+
temperature=0.8,
|
| 46 |
+
max_token=500
|
| 47 |
+
)
|
| 48 |
+
return completion.choices[0].message.content
|
| 49 |
+
|
| 50 |
+
# Gradio UI
|
| 51 |
+
chatbot = gr.ChatInterface(
|
| 52 |
+
fn=chat_fn,
|
| 53 |
+
title="திருவாசகம் RAG Chatbot",
|
| 54 |
+
description="திருவாசகத்தை அடிப்படையாகக் கொண்டு கேள்விகளை கேளுங்கள் (Tamil/English supported)."
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
if __name__ == "__main__":
|
| 58 |
+
chatbot.launch(server_name="0.0.0.0", server_port=7860, debug=True)
|