abid-ai commited on
Commit
79941ec
ยท
verified ยท
1 Parent(s): 423b7af

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +144 -0
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gdown
3
+ import time
4
+ import gradio as gr
5
+
6
+ # Modern Imports
7
+ from langchain_community.document_loaders import PyPDFLoader
8
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
9
+ from langchain_huggingface import HuggingFaceEmbeddings
10
+ from langchain_community.vectorstores import FAISS
11
+ from langchain_groq import ChatGroq
12
+ from langchain_core.prompts import ChatPromptTemplate
13
+ from langchain_core.runnables import RunnablePassthrough
14
+ from langchain_core.output_parsers import StrOutputParser
15
+
16
+ # ==========================================
17
+ # 1. SETUP & KEYS
18
+ # ==========================================
19
+ # On Hugging Face, you will set GROQ_API_KEY in the "Settings" tab under "Secrets"
20
+ os.environ["GROQ_API_KEY"] = os.getenv('GROQ_API_KEY')
21
+
22
+ # Assembly Language and Data Structures Knowledge Base
23
+ links_to_process = [
24
+ "https://drive.google.com/file/d/1rb7AeJZrDNR-bq8Q9V4IvtzYZsDOvDH0/view?usp=sharing",
25
+ "https://drive.google.com/file/d/16PcJo_JaQHh1bx01lCAkc4QwQ6YnLb-K/view?usp=sharing"
26
+ ]
27
+
28
+ output_dir = 'knowledge_base'
29
+ if not os.path.exists(output_dir):
30
+ os.makedirs(output_dir)
31
+
32
+ # ==========================================
33
+ # 2. IMPROVED DOWNLOAD LOGIC
34
+ # ==========================================
35
+ def build_vector_db(links):
36
+ print(f"๐Ÿ“ฅ Syncing Computer Science Knowledge Base...")
37
+
38
+ for link in links:
39
+ try:
40
+ if "/folders/" in link:
41
+ gdown.download_folder(url=link, output=output_dir, quiet=True, use_cookies=False)
42
+ else:
43
+ gdown.download(url=link, output=output_dir + "/", quiet=True)
44
+ time.sleep(1)
45
+ except Exception as e:
46
+ print(f"โš ๏ธ Skip Link: {e}")
47
+
48
+ all_docs = []
49
+ for root, dirs, files in os.walk(output_dir):
50
+ for filename in files:
51
+ if filename.endswith(".pdf"):
52
+ file_path = os.path.join(root, filename)
53
+ try:
54
+ loader = PyPDFLoader(file_path)
55
+ all_docs.extend(loader.load())
56
+ except Exception:
57
+ pass
58
+
59
+ if not all_docs:
60
+ return None
61
+
62
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
63
+ chunks = text_splitter.split_documents(all_docs)
64
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
65
+ return FAISS.from_documents(chunks, embeddings)
66
+
67
+ # Initialize
68
+ vector_store = build_vector_db(links_to_process)
69
+ if vector_store:
70
+ retriever = vector_store.as_retriever(search_kwargs={"k": 3})
71
+ else:
72
+ retriever = None
73
+
74
+ # ==========================================
75
+ # 3. MODERN RAG CHAIN
76
+ # ==========================================
77
+ llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)
78
+
79
+ template = """You are a CS Professor's assistant. Answer the question about Assembly Language or Data Structures using the provided context.
80
+ If the answer is not in the context, say you don't know based on current documents.
81
+
82
+ Context:
83
+ {context}
84
+
85
+ Question: {question}
86
+
87
+ Expert Answer:"""
88
+
89
+ prompt = ChatPromptTemplate.from_template(template)
90
+
91
+ if retriever:
92
+ rag_chain = (
93
+ {"context": retriever, "question": RunnablePassthrough()}
94
+ | prompt
95
+ | llm
96
+ | StrOutputParser()
97
+ )
98
+ else:
99
+ rag_chain = None
100
+
101
+ # ==========================================
102
+ # 4. FRONTEND
103
+ # ==========================================
104
+ custom_css = """
105
+ #main-container { max-width: 900px; margin: auto; padding: 20px; }
106
+ .header-text { text-align: center; color: #2563eb; margin-bottom: 2px; }
107
+ .report-box { background-color: #f8fafc; border-radius: 12px; border: 1px solid #e2e8f0; padding: 20px; }
108
+ """
109
+
110
+ def process_query(query):
111
+ if not rag_chain:
112
+ return "โŒ Error: Knowledge Base not loaded. Check Google Drive links."
113
+ if not query.strip():
114
+ return "### โš ๏ธ Please enter a question regarding Assembly or Data Structures."
115
+ try:
116
+ return rag_chain.invoke(query)
117
+ except Exception as e:
118
+ return f"### โŒ Error\n{str(e)}"
119
+
120
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), css=custom_css) as demo:
121
+ with gr.Column(elem_id="main-container"):
122
+ gr.Markdown("# ๐Ÿ›๏ธ ByteWise: CS Fundamental Intelligence", elem_classes="header-text")
123
+ gr.Markdown("<p style='text-align: center;'>Specialized RAG for Assembly Language & Data Structures</p>")
124
+ gr.HTML("<hr>")
125
+
126
+ user_input = gr.Textbox(
127
+ label="Inquiry",
128
+ placeholder="e.g., Explain the stack operations in Assembly vs Linked List implementation...",
129
+ lines=3
130
+ )
131
+
132
+ with gr.Row():
133
+ submit_btn = gr.Button("CONSULT KNOWLEDGE BASE", variant="primary", scale=2)
134
+ clear_btn = gr.ClearButton([user_input], value="CLEAR", scale=1)
135
+
136
+ gr.Markdown("### ๐Ÿ“‹ Academic Intelligence Report")
137
+ with gr.Column(elem_classes="report-box"):
138
+ output_display = gr.Markdown(value="_Results will be rendered using academic context._")
139
+
140
+ submit_btn.click(fn=process_query, inputs=user_input, outputs=output_display)
141
+ user_input.submit(fn=process_query, inputs=user_input, outputs=output_display)
142
+
143
+ if __name__ == "__main__":
144
+ demo.launch()