Jasur05 commited on
Commit
cb0bad9
Β·
1 Parent(s): 5202c52
Files changed (1) hide show
  1. app.py +73 -38
app.py CHANGED
@@ -1,14 +1,14 @@
1
  import os
2
  from dotenv import load_dotenv
3
- import streamlit as st # Streamlit frontend
4
 
5
  # ─── 1. Load environment variables ─────────────────────────────────────────
6
  load_dotenv()
7
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
8
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 
9
  if not COHERE_API_KEY or not GEMINI_API_KEY:
10
- st.error("❗️ Missing COHERE_API_KEY or GEMINI_API_KEY in environment")
11
- st.stop()
12
 
13
  # ─── 2. Initialize vector store and embedder clients ───────────────────────
14
  import cohere
@@ -20,42 +20,50 @@ from google.genai import types
20
  co = cohere.Client(COHERE_API_KEY)
21
 
22
  # Gemini client for generation
23
- # Initialize with API key; will also respect GOOGLE_API_KEY env var
24
  genai_client = genai.Client(api_key=GEMINI_API_KEY)
25
 
26
  # Chroma vector store client
27
  client = chromadb.Client()
 
28
  # Create or get existing collection
29
  collection = client.get_or_create_collection(name="inha-well", embedding_function=None)
30
 
31
  # ─── 3. Ingestion & Embedding (run only once) ──────────────────────────────
32
  # Check if collection is empty to avoid re-ingesting on each run
33
  total_docs = collection.count() if hasattr(collection, 'count') else len(collection.get()['documents'])
 
34
  if total_docs == 0:
35
  content_chunks = []
36
  for i in range(1, 4):
37
  # Build the absolute path to each docs folder
38
  folder_path = f"docs/p0000{i}"
 
 
 
 
 
 
39
  for filename in os.listdir(folder_path):
40
  if filename.endswith(".txt"):
41
  with open(os.path.join(folder_path, filename), "r") as f:
42
  content = f.read()
43
- content_chunks.append(f"search_document: {content}")
44
-
45
- response = co.embed(
46
- texts=content_chunks,
47
- model="embed-english-v3.0",
48
- input_type="search_document"
49
- )
50
- embeddings = response.embeddings
51
-
52
- collection.add(
53
- ids=[str(i) for i in range(len(content_chunks))],
54
- documents=content_chunks,
55
- embeddings=embeddings
56
- )
 
57
 
58
- # ─── 4. Retrieval & Prompt Utilities & Prompt Utilities ────────────────────────────────────────
59
  def retrieve_context(question, collection, top_k=2):
60
  qr = co.embed(
61
  texts=[question],
@@ -66,7 +74,6 @@ def retrieve_context(question, collection, top_k=2):
66
  results = collection.query(query_embeddings=[emb], n_results=top_k)
67
  return "\n".join(results["documents"][0])
68
 
69
-
70
  def get_prompt_plain(context: str, question: str) -> str:
71
  return f"""
72
  <<START>>
@@ -74,13 +81,13 @@ You are a responsible person for answering Inha University (South Korea) informa
74
  Provide concise, well-structured, answer-oriented responses. Do not repeat the prompt text in your output.
75
 
76
  Context:
77
- ""{context}""
78
 
79
  Question: {question}
 
80
  Answer:
81
  <<END>>"""
82
 
83
-
84
  def generate_agent_answer(context: str, question: str) -> str:
85
  prompt = get_prompt_plain(context, question)
86
  response = genai_client.models.generate_content(
@@ -94,24 +101,52 @@ def generate_agent_answer(context: str, question: str) -> str:
94
  )
95
  return response.text.strip()
96
 
97
-
98
  def rag_answer(question: str, collection) -> str:
99
  context = retrieve_context(question, collection, top_k=1)
100
  return generate_agent_answer(context, question)
101
 
102
- # ─── 5. Streamlit Frontend ───────────────────────────────────────────────────
103
- st.set_page_config(
104
- page_title="Inha University Info Assistant",
105
- page_icon="πŸ“š",
106
- layout="centered"
107
- )
108
- st.title("Inha University Info Assistant")
109
- question = st.text_input(
110
- "Ask me anything about Inha University…",
111
- placeholder="e.g. What clubs are available in the 4th semester?"
112
- )
113
- if st.button("πŸ” Get Answer"):
114
- with st.spinner("Retrieving answer…"):
115
  answer = rag_answer(question, collection)
116
- st.subheader("πŸ“Œ Answer")
117
- st.write(answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  from dotenv import load_dotenv
3
+ import gradio as gr # Changed from streamlit to gradio
4
 
5
  # ─── 1. Load environment variables ─────────────────────────────────────────
6
  load_dotenv()
7
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
8
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
9
+
10
  if not COHERE_API_KEY or not GEMINI_API_KEY:
11
+ raise ValueError("❗️ Missing COHERE_API_KEY or GEMINI_API_KEY in environment")
 
12
 
13
  # ─── 2. Initialize vector store and embedder clients ───────────────────────
14
  import cohere
 
20
  co = cohere.Client(COHERE_API_KEY)
21
 
22
  # Gemini client for generation
 
23
  genai_client = genai.Client(api_key=GEMINI_API_KEY)
24
 
25
  # Chroma vector store client
26
  client = chromadb.Client()
27
+
28
  # Create or get existing collection
29
  collection = client.get_or_create_collection(name="inha-well", embedding_function=None)
30
 
31
  # ─── 3. Ingestion & Embedding (run only once) ──────────────────────────────
32
  # Check if collection is empty to avoid re-ingesting on each run
33
  total_docs = collection.count() if hasattr(collection, 'count') else len(collection.get()['documents'])
34
+
35
  if total_docs == 0:
36
  content_chunks = []
37
  for i in range(1, 4):
38
  # Build the absolute path to each docs folder
39
  folder_path = f"docs/p0000{i}"
40
+
41
+ # Add error handling for missing folders
42
+ if not os.path.exists(folder_path):
43
+ print(f"Warning: Folder {folder_path} not found")
44
+ continue
45
+
46
  for filename in os.listdir(folder_path):
47
  if filename.endswith(".txt"):
48
  with open(os.path.join(folder_path, filename), "r") as f:
49
  content = f.read()
50
+ content_chunks.append(f"search_document: {content}")
51
+
52
+ if content_chunks:
53
+ response = co.embed(
54
+ texts=content_chunks,
55
+ model="embed-english-v3.0",
56
+ input_type="search_document"
57
+ )
58
+ embeddings = response.embeddings
59
+
60
+ collection.add(
61
+ ids=[str(i) for i in range(len(content_chunks))],
62
+ documents=content_chunks,
63
+ embeddings=embeddings
64
+ )
65
 
66
+ # ─── 4. Retrieval & Prompt Utilities ────────────────────────────────────────
67
  def retrieve_context(question, collection, top_k=2):
68
  qr = co.embed(
69
  texts=[question],
 
74
  results = collection.query(query_embeddings=[emb], n_results=top_k)
75
  return "\n".join(results["documents"][0])
76
 
 
77
  def get_prompt_plain(context: str, question: str) -> str:
78
  return f"""
79
  <<START>>
 
81
  Provide concise, well-structured, answer-oriented responses. Do not repeat the prompt text in your output.
82
 
83
  Context:
84
+ "{context}"
85
 
86
  Question: {question}
87
+
88
  Answer:
89
  <<END>>"""
90
 
 
91
  def generate_agent_answer(context: str, question: str) -> str:
92
  prompt = get_prompt_plain(context, question)
93
  response = genai_client.models.generate_content(
 
101
  )
102
  return response.text.strip()
103
 
 
104
  def rag_answer(question: str, collection) -> str:
105
  context = retrieve_context(question, collection, top_k=1)
106
  return generate_agent_answer(context, question)
107
 
108
+ # ─── 5. Gradio Interface Function ─────────────────────────────────────────────
109
+ def answer_question(question):
110
+ """
111
+ Main function that processes the question and returns the answer
112
+ """
113
+ if not question.strip():
114
+ return "Please enter a question about Inha University."
115
+
116
+ try:
 
 
 
 
117
  answer = rag_answer(question, collection)
118
+ return answer
119
+ except Exception as e:
120
+ return f"Sorry, I encountered an error: {str(e)}"
121
+
122
+ # ─── 6. Gradio Frontend ─────────────────────────────────────────────────────
123
+ # Create the Gradio interface
124
+ demo = gr.Interface(
125
+ fn=answer_question,
126
+ inputs=gr.Textbox(
127
+ label="Ask me anything about Inha University…",
128
+ placeholder="e.g. What clubs are available in the 4th semester?",
129
+ lines=2
130
+ ),
131
+ outputs=gr.Textbox(
132
+ label="πŸ“Œ Answer",
133
+ lines=8,
134
+ show_copy_button=True
135
+ ),
136
+ title="πŸ“š Inha University Info Assistant",
137
+ description="Get answers to your questions about Inha University using AI-powered search.",
138
+ theme=gr.themes.Soft(),
139
+ examples=[
140
+ ["What clubs are available in the 4th semester?"],
141
+ ["Tell me about the admission requirements."],
142
+ ["What are the campus facilities?"]
143
+ ]
144
+ )
145
+
146
+ # Launch the app
147
+ if __name__ == "__main__":
148
+ demo.launch(
149
+ share=True, # Creates a public link
150
+ server_name="0.0.0.0", # Allows external access
151
+ server_port=7860 # Default port for Hugging Face Spaces
152
+ )