Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from openai import AzureOpenAI | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| # Load PDF (Tiruvāsagam) | |
| loader = PyPDFLoader("tiru.pdf") | |
| docs = loader.load() | |
| # Split into chunks | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| chunks = splitter.split_documents(docs) | |
| # Local embedding model (Tamil capable) | |
| embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") | |
| # Store in Chroma | |
| vectorstore = Chroma.from_documents(chunks, embedding=embedding_model) | |
| retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3}) | |
| # Azure OpenAI client | |
| client = AzureOpenAI( | |
| api_key=os.getenv("AZURE_OPENAI_API_KEY").strip(), | |
| api_version="2025-01-01-preview", | |
| azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip() | |
| ) | |
| # Chat function | |
| def chat_fn(message, history): | |
| docs = retriever.get_relevant_documents(message) | |
| context = "\n\n".join([d.page_content for d in docs]) | |
| completion = client.chat.completions.create( | |
| model="gpt-4.1", # your Azure deployment name | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": ( | |
| "You are a helpful assistant answering only from Tiruvāsagam. " | |
| "Always reply in Tamil with simple, clear, and correct grammar. " | |
| "If the question is not related to Tiruvāsagam, Lord Shiva, or " | |
| "Manikkavasagar, just reply: 'எனக்கு தெரியாது'." | |
| ) | |
| }, | |
| { | |
| "role": "user", | |
| "content": f"Context:\n{context}\n\nQuestion: {message}" | |
| } | |
| ], | |
| temperature=0.8, | |
| max_tokens=1000 | |
| ) | |
| return completion.choices[0].message.content | |
| # Gradio UI | |
| chatbot = gr.ChatInterface( | |
| fn=chat_fn, | |
| title="திருவாசகம் RAG Chatbot", | |
| description="திருவாசகத்தை அடிப்படையாகக் கொண்டு கேள்விகளை கேளுங்கள் (Tamil/English supported)." | |
| ) | |
| if __name__ == "__main__": | |
| chatbot.launch(server_name="0.0.0.0", server_port=7860, debug=True) | |