Spaces:

Jasur05
/

InhaBit

Sleeping

App Files Files Community

Jasur05 commited on Jul 31, 2025

Commit

cb0bad9

1 Parent(s): 5202c52

sadfsda

Browse files

Files changed (1) hide show

app.py +73 -38

app.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import os
 from dotenv import load_dotenv
-import streamlit as st  # Streamlit frontend
 # ─── 1. Load environment variables ─────────────────────────────────────────
 load_dotenv()
 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 if not COHERE_API_KEY or not GEMINI_API_KEY:
-    st.error("❗️ Missing COHERE_API_KEY or GEMINI_API_KEY in environment")
-    st.stop()
 # ─── 2. Initialize vector store and embedder clients ───────────────────────
 import cohere
@@ -20,42 +20,50 @@ from google.genai import types
 co = cohere.Client(COHERE_API_KEY)
 # Gemini client for generation
-# Initialize with API key; will also respect GOOGLE_API_KEY env var
 genai_client = genai.Client(api_key=GEMINI_API_KEY)
 # Chroma vector store client
 client = chromadb.Client()
 # Create or get existing collection
 collection = client.get_or_create_collection(name="inha-well", embedding_function=None)
 # ─── 3. Ingestion & Embedding (run only once) ──────────────────────────────
 # Check if collection is empty to avoid re-ingesting on each run
 total_docs = collection.count() if hasattr(collection, 'count') else len(collection.get()['documents'])
 if total_docs == 0:
     content_chunks = []
     for i in range(1, 4):
         # Build the absolute path to each docs folder
         folder_path = f"docs/p0000{i}"
         for filename in os.listdir(folder_path):
             if filename.endswith(".txt"):
                 with open(os.path.join(folder_path, filename), "r") as f:
                     content = f.read()
-                content_chunks.append(f"search_document: {content}")
-    response = co.embed(
-        texts=content_chunks,
-        model="embed-english-v3.0",
-        input_type="search_document"
-    )
-    embeddings = response.embeddings
-    collection.add(
-        ids=[str(i) for i in range(len(content_chunks))],
-        documents=content_chunks,
-        embeddings=embeddings
-    )
-# ─── 4. Retrieval & Prompt Utilities & Prompt Utilities ────────────────────────────────────────
 def retrieve_context(question, collection, top_k=2):
     qr = co.embed(
         texts=[question],
@@ -66,7 +74,6 @@ def retrieve_context(question, collection, top_k=2):
     results = collection.query(query_embeddings=[emb], n_results=top_k)
     return "\n".join(results["documents"][0])
 def get_prompt_plain(context: str, question: str) -> str:
     return f"""
 <<START>>
@@ -74,13 +81,13 @@ You are a responsible person for answering Inha University (South Korea) informa
 Provide concise, well-structured, answer-oriented responses. Do not repeat the prompt text in your output.
 Context:
-""{context}""
 Question: {question}
 Answer:
 <<END>>"""
 def generate_agent_answer(context: str, question: str) -> str:
     prompt = get_prompt_plain(context, question)
     response = genai_client.models.generate_content(
@@ -94,24 +101,52 @@ def generate_agent_answer(context: str, question: str) -> str:
     )
     return response.text.strip()
 def rag_answer(question: str, collection) -> str:
     context = retrieve_context(question, collection, top_k=1)
     return generate_agent_answer(context, question)
-# ─── 5. Streamlit Frontend ───────────────────────────────────────────────────
-st.set_page_config(
-    page_title="Inha University Info Assistant",
-    page_icon="📚",
-    layout="centered"
-)
-st.title("Inha University Info Assistant")
-question = st.text_input(
-    "Ask me anything about Inha University…",
-    placeholder="e.g. What clubs are available in the 4th semester?"
-)
-if st.button("🔍 Get Answer"):
-    with st.spinner("Retrieving answer…"):
         answer = rag_answer(question, collection)
-    st.subheader("📌 Answer")
-    st.write(answer)

 import os
 from dotenv import load_dotenv
+import gradio as gr  # Changed from streamlit to gradio
 # ─── 1. Load environment variables ─────────────────────────────────────────
 load_dotenv()
 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 if not COHERE_API_KEY or not GEMINI_API_KEY:
+    raise ValueError("❗️ Missing COHERE_API_KEY or GEMINI_API_KEY in environment")
 # ─── 2. Initialize vector store and embedder clients ───────────────────────
 import cohere
 co = cohere.Client(COHERE_API_KEY)
 # Gemini client for generation
 genai_client = genai.Client(api_key=GEMINI_API_KEY)
 # Chroma vector store client
 client = chromadb.Client()
 # Create or get existing collection
 collection = client.get_or_create_collection(name="inha-well", embedding_function=None)
 # ─── 3. Ingestion & Embedding (run only once) ──────────────────────────────
 # Check if collection is empty to avoid re-ingesting on each run
 total_docs = collection.count() if hasattr(collection, 'count') else len(collection.get()['documents'])
 if total_docs == 0:
     content_chunks = []
     for i in range(1, 4):
         # Build the absolute path to each docs folder
         folder_path = f"docs/p0000{i}"
+        # Add error handling for missing folders
+        if not os.path.exists(folder_path):
+            print(f"Warning: Folder {folder_path} not found")
+            continue
         for filename in os.listdir(folder_path):
             if filename.endswith(".txt"):
                 with open(os.path.join(folder_path, filename), "r") as f:
                     content = f.read()
+                    content_chunks.append(f"search_document: {content}")
+    if content_chunks:
+        response = co.embed(
+            texts=content_chunks,
+            model="embed-english-v3.0",
+            input_type="search_document"
+        )
+        embeddings = response.embeddings
+        collection.add(
+            ids=[str(i) for i in range(len(content_chunks))],
+            documents=content_chunks,
+            embeddings=embeddings
+        )
+# ─── 4. Retrieval & Prompt Utilities ────────────────────────────────────────
 def retrieve_context(question, collection, top_k=2):
     qr = co.embed(
         texts=[question],
     results = collection.query(query_embeddings=[emb], n_results=top_k)
     return "\n".join(results["documents"][0])
 def get_prompt_plain(context: str, question: str) -> str:
     return f"""
 <<START>>
 Provide concise, well-structured, answer-oriented responses. Do not repeat the prompt text in your output.
 Context:
+"{context}"
 Question: {question}
 Answer:
 <<END>>"""
 def generate_agent_answer(context: str, question: str) -> str:
     prompt = get_prompt_plain(context, question)
     response = genai_client.models.generate_content(
     )
     return response.text.strip()
 def rag_answer(question: str, collection) -> str:
     context = retrieve_context(question, collection, top_k=1)
     return generate_agent_answer(context, question)
+# ─── 5. Gradio Interface Function ─────────────────────────────────────────────
+def answer_question(question):
+    """
+    Main function that processes the question and returns the answer
+    """
+    if not question.strip():
+        return "Please enter a question about Inha University."
+    try:
         answer = rag_answer(question, collection)
+        return answer
+    except Exception as e:
+        return f"Sorry, I encountered an error: {str(e)}"
+# ─── 6. Gradio Frontend ─────────────────────────────────────────────────────
+# Create the Gradio interface
+demo = gr.Interface(
+    fn=answer_question,
+    inputs=gr.Textbox(
+        label="Ask me anything about Inha University…",
+        placeholder="e.g. What clubs are available in the 4th semester?",
+        lines=2
+    ),
+    outputs=gr.Textbox(
+        label="📌 Answer",
+        lines=8,
+        show_copy_button=True
+    ),
+    title="📚 Inha University Info Assistant",
+    description="Get answers to your questions about Inha University using AI-powered search.",
+    theme=gr.themes.Soft(),
+    examples=[
+        ["What clubs are available in the 4th semester?"],
+        ["Tell me about the admission requirements."],
+        ["What are the campus facilities?"]
+    ]
+)
+# Launch the app
+if __name__ == "__main__":
+    demo.launch(
+        share=True,  # Creates a public link
+        server_name="0.0.0.0",  # Allows external access
+        server_port=7860  # Default port for Hugging Face Spaces
+    )