sultan-hassan commited on
Commit
2c8124e
·
verified ·
1 Parent(s): c3312f3

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +77 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datasets import load_dataset
3
+ from dotenv import load_dotenv
4
+ from langchain_groq import ChatGroq
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ from langchain_chroma import Chroma
7
+ from langchain_core.prompts import PromptTemplate
8
+ from langchain_core.output_parsers import StrOutputParser
9
+ from langchain_core.runnables import RunnablePassthrough
10
+ import gradio as gr
11
+
12
+ # Load environment variables from .env file
13
+ load_dotenv()
14
+ groq_key = os.environ.get('groq_api_keys')
15
+ # Initialize LLM
16
+ llm = ChatGroq(model="llama-3.1-8b-instant", api_key=groq_key)
17
+
18
+ print("✅ Setup complete. API Key loaded.")
19
+
20
+
21
+ # Load data from huggingface for astro arxiv papers
22
+ ds = load_dataset("mehnaazasad/arxiv_astro_co_ga")
23
+ data = ds["test"]["abstract"][:20] # take first examples
24
+
25
+
26
+ # 1. Initialize the Embedding Model (Converts text to math)
27
+ embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
28
+
29
+ # 2. Create and Populate Vector Store
30
+ vectorstore = Chroma(
31
+ collection_name="dataset_store",
32
+ embedding_function=embed_model,
33
+ persist_directory="./chroma_db",
34
+ )
35
+
36
+
37
+ vectorstore.add_texts(data)
38
+ retriever = vectorstore.as_retriever()
39
+
40
+
41
+
42
+ print("🧠 Vector Store created. The AI can now 'search' your data.")
43
+
44
+
45
+ template = """You are astronomy expert.
46
+ Use the provided context to answer the question.
47
+ If you don't know, say you don't know. Explain in detail.
48
+ Context: {context}
49
+ Question: {question}
50
+ Answer:"""
51
+
52
+ rag_prompt = PromptTemplate.from_template(template)
53
+
54
+ rag_chain = (
55
+ {"context": retriever, "question": RunnablePassthrough()}
56
+ | rag_prompt
57
+ | llm
58
+ | StrOutputParser()
59
+ )
60
+ print("⛓️ RAG Chain is ready.")
61
+
62
+ def rag_memory_stream(text):
63
+ partial_text = ""
64
+ for new_text in rag_chain.stream(text):
65
+ partial_text += new_text
66
+ yield partial_text
67
+
68
+ demo = gr.Interface(
69
+ title="Real-time Astronomy AI Assistant",
70
+ fn=rag_memory_stream,
71
+ inputs="text",
72
+ outputs="text",
73
+ examples=['what are the characteristics of blue compact dwarf?', 'What is cold dark matter?'],
74
+ )
75
+
76
+
77
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ huggingface_hub
2
+ langchain_groq
3
+ langchain_huggingface
4
+ langchain_chroma
5
+ langchain_core
6
+ gradio
7
+ sentence-transformers