sultan-hassan commited on
Commit
a74b363
·
verified ·
1 Parent(s): 482d5b7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ from datasets import load_dataset
4
+ from dotenv import load_dotenv
5
+ from langchain_groq import ChatGroq
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
+ from langchain_chroma import Chroma
8
+ from langchain_core.prompts import PromptTemplate
9
+ from langchain_core.output_parsers import StrOutputParser
10
+ from langchain_core.runnables import RunnablePassthrough
11
+ import gradio as gr
12
+
13
+ # Load environment variables from .env file
14
+ load_dotenv()
15
+ groq_key = os.environ.get('groq_api_keys')
16
+ # Initialize LLM
17
+ llm = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_key)
18
+
19
+ print("✅ Setup complete. API Key loaded.")
20
+
21
+
22
+ # Load data from huggingface for astro arxiv papers
23
+ ds = load_dataset("mehnaazasad/arxiv_astro_co_ga")
24
+ data = ds["test"]["abstract"][:50] # take first examples
25
+
26
+
27
+ # 1. Initialize the Embedding Model (Converts text to math)
28
+ embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
29
+
30
+ # 2. Create and Populate Vector Store
31
+ vectorstore = Chroma(
32
+ collection_name="dataset_store",
33
+ embedding_function=embed_model,
34
+ persist_directory="./chroma_db",
35
+ )
36
+
37
+
38
+ vectorstore.add_texts(data)
39
+ retriever = vectorstore.as_retriever()
40
+
41
+
42
+
43
+ print("🧠 Vector Store created. The AI can now 'search' your data.")
44
+
45
+
46
+ template = """You are astronomy expert.
47
+ Use the provided context to answer the question.
48
+ If you don't know, say you don't know. Explain in detail.
49
+ Context: {context}
50
+ Question: {question}
51
+ Answer:"""
52
+
53
+ rag_prompt = PromptTemplate.from_template(template)
54
+
55
+ rag_chain = (
56
+ {"context": retriever, "question": RunnablePassthrough()}
57
+ | rag_prompt
58
+ | llm
59
+ | StrOutputParser()
60
+ )
61
+ print("⛓️ RAG Chain is ready.")
62
+
63
+ def rag_memory_stream(text):
64
+ partial_text = ""
65
+ for new_text in rag_chain.stream(text):
66
+ partial_text += new_text
67
+ yield partial_text
68
+
69
+ demo = gr.Interface(
70
+ title="Real-time Astronomy AI Assistant",
71
+ fn=rag_memory_stream,
72
+ inputs="text",
73
+ outputs="text",
74
+ examples=['what are the characteristics of blue compact dwarf?', 'What is cold dark matter?'],
75
+ )
76
+
77
+
78
+ demo.launch()
79
+