mayzinoo commited on
Commit
bc4b2cc
·
verified ·
1 Parent(s): e375563

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -118
app.py CHANGED
@@ -1,135 +1,112 @@
1
- # app.py
2
- import gradio as gr
3
  import os
4
- from transformers import pipeline
5
- from sentence_transformers import SentenceTransformer
6
- import faiss
7
- import numpy as np
8
- import json
9
  import re
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
 
 
11
 
12
- # --- Load necessary components for the RAG system ---
13
- # These paths are relative to the Space's root directory
14
- FAISS_INDEX_PATH = "sol_faiss_index.bin"
15
- DOCUMENT_IDS_PATH = "sol_document_ids.json"
16
-
17
- # Load SentenceTransformer model
18
- # Ensure this model is downloaded or available in the environment
19
- # For Spaces, you might need to add it to requirements.txt or directly download if space has internet
20
- # It's better to declare it globally or as a shared resource.
21
- try:
22
- model = SentenceTransformer('all-mpnet-base-v2')
23
- except Exception as e:
24
- print(f"Error loading SentenceTransformer model: {e}")
25
- print("Attempting to load from local cache or download on first use.")
26
- # If running in a Space, the model will be downloaded to cache if not present.
27
- # Ensure you have internet access in your Space settings.
28
-
29
- # Load FAISS index
30
- try:
31
- index = faiss.read_index(FAISS_INDEX_PATH)
32
- except Exception as e:
33
- print(f"Error loading FAISS index: {e}")
34
- # Handle error, maybe create a dummy index or exit
35
- index = None # Placeholder if loading fails
36
-
37
- # Load document IDs
38
- try:
39
- with open(DOCUMENT_IDS_PATH, "r") as f:
40
- document_ids = json.load(f)
41
- except Exception as e:
42
- print(f"Error loading document IDs: {e}")
43
- document_ids = [] # Placeholder if loading fails
44
-
45
- # Placeholder for the actual content of "10 Geometry Mathematics Instructional Guide.pdf"
46
- # In a real deployed scenario, this content would be loaded from a file
47
- # that you upload to your Hugging Face Space or fetched at runtime.
48
- # For now, we'll assume it's available or that 'documents' are pre-processed and loaded.
49
- # You would typically load the 'documents' list created in Step 2 here.
50
- # For deployment, it's best to save the `documents` list (sol_data) as a JSON
51
- # and load it back. Let's add that.
52
-
53
- # Assuming you've saved sol_data as 'sol_documents.json'
54
- SOL_DOCUMENTS_PATH = "sol_documents.json"
55
- try:
56
- with open(SOL_DOCUMENTS_PATH, "r") as f:
57
- documents = json.load(f)
58
- except Exception as e:
59
- print(f"Error loading sol documents: {e}")
60
- documents = [] # Placeholder
61
-
62
- # Load LLM for generation
63
- # For a Hugging Face Space, you need to ensure the model is available.
64
- # 'google/gemma-2b-it' is a good option.
65
- # Ensure you set up environment variables or secrets for API keys if using paid models.
66
- try:
67
- # llm_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
68
- llm_pipeline = pipeline("text-generation", model="google/gemma-2b-it")
69
- except Exception as e:
70
- print(f"Error loading LLM pipeline: {e}")
71
- llm_pipeline = None # Placeholder
72
-
73
-
74
- def retrieve_and_generate_app(query, top_k=3):
75
- if not model or not index or not document_ids or not documents or not llm_pipeline:
76
- return "System not fully initialized. Please check logs for missing components."
77
 
78
- try:
79
- # 1. Query Embedding
80
- query_embedding = model.encode([query])
 
 
 
81
 
82
- # 2. Retrieval using FAISS
83
- D, I = index.search(query_embedding, top_k)
84
 
85
- retrieved_docs = []
86
- for i in I[0]:
87
- sol_id = document_ids[i]
88
- retrieved_content = next((doc["content"] for doc in documents if doc["id"] == sol_id), "Content not found.")
89
- retrieved_docs.append({"id": sol_id, "content": retrieved_content})
90
 
91
- # 3. Context Construction
92
- context = "\n\n".join([f"SOL {doc['id']}: {doc['content']}" for doc in retrieved_docs])
93
 
94
- # 4. LLM Generation
95
- prompt = f"""
96
- Given the following information about Virginia Standards of Learning (SOLs):
97
- {context}
98
- Based on this information, answer the following question:
99
- {query}
100
- If the question is about a specific SOL number, provide a direct explanation for that SOL.
101
- If asked for lesson plans, worksheets, or proofs, explain what the document generally entails and whether it provides such materials.
102
- Be concise and to the point.
103
  """
104
- print(f"\n--- PROMPT SENT TO LLM ---\n{prompt}\n--------------------------\n")
105
-
106
- response = llm_pipeline(prompt, max_new_tokens=500, num_return_sequences=1, do_sample=True, temperature=0.7)
107
-
108
- generated_text = response[0]['generated_text']
 
109
 
110
- print(f"\n--- RAW GENERATED TEXT ---\n{generated_text}\n--------------------------\n")
 
 
 
 
 
 
 
 
 
 
 
111
 
112
- answer_start_marker = f"Based on this information, answer the following question:\n{query}"
113
- if answer_start_marker in generated_text:
114
- answer = generated_text.split(answer_start_marker, 1)[1].strip()
115
- answer = re.sub(r'If the question is about a specific SOL number,.*?$', '', answer, flags=re.DOTALL).strip()
116
- else:
117
- answer = generated_text
 
 
118
 
119
- print(f"\n--- FINAL ANSWER ---\n{answer}\n--------------------\n")
120
- return answer if answer else "No valid response generated. Check logs for details."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  except Exception as e:
122
- print(f"\n--- ERROR ---\n{str(e)}\n------------\n")
123
- return f"An error occurred: {str(e)}. Please check the logs for more details."
124
-
125
- # Create Gradio interface
126
- demo = gr.Interface(
127
- fn=retrieve_and_generate_app,
128
- inputs=gr.Textbox(lines=2, placeholder="Enter your geometry-related question here..."),
129
- outputs=gr.Textbox(label="Generated Answer"),
130
- title="Virginia SOL Geometry Assistant",
131
- description="Ask questions about the Geometry SOL Instructional Guide"
 
 
132
  )
133
 
134
  if __name__ == "__main__":
135
- demo.launch()
 
 
 
1
  import os
 
 
 
 
 
2
  import re
3
+ import gradio as gr
4
+ from langchain.prompts import PromptTemplate
5
+ from langchain_openai import ChatOpenAI
6
+ from langchain.vectorstores import Chroma
7
+ from langchain.embeddings import HuggingFaceEmbeddings
8
+
9
+ # Load embedding model and vector store from persisted DB
10
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
11
+ vector_store = Chroma(
12
+ embedding=embedding_model,
13
+ persist_directory="geometry_db", # relative folder inside your Hugging Face Space
14
+ collection_name="geometry_sol"
15
+ )
16
 
17
+ # Load OpenAI key (you must add this in Hugging Face Space Secrets)
18
+ os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
19
 
20
+ # Load the LLM (GPT-3.5)
21
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ # Prompt templates
24
+ templates = {
25
+ "general": PromptTemplate(
26
+ input_variables=["context", "query"],
27
+ template="""
28
+ You are a strict assistant for the Virginia Geometry SOL.
29
 
30
+ Only use exact phrases from the following SOL text:
31
+ {context}
32
 
33
+ Answer the question: "{query}"
 
 
 
 
34
 
35
+ If the answer is in the SOL text, quote it exactly. Do not rephrase or summarize. Do not add your own explanation.
 
36
 
37
+ If the answer is not in the context, reply: "The answer is not found in the provided SOL text."
 
 
 
 
 
 
 
 
38
  """
39
+ ),
40
+ "lesson plan": PromptTemplate(
41
+ input_variables=["context", "query"],
42
+ template="""
43
+ Given the following retrieved SOL text:
44
+ {context}
45
 
46
+ Generate a Geometry lesson plan based on: "{query}"
47
+ Include:
48
+ 1. Simple explanation of the concept.
49
+ 2. Real-world example.
50
+ 3. Engaging class activity.
51
+ Be concise and curriculum-aligned for high school.
52
+ """
53
+ ),
54
+ "worksheet": PromptTemplate(
55
+ input_variables=["context", "query"],
56
+ template="""
57
+ {context}
58
 
59
+ Create a student worksheet for: "{query}"
60
+ Include: concept summary, a worked example, and 3 practice problems.
61
+ """
62
+ ),
63
+ "proofs": PromptTemplate(
64
+ input_variables=["context", "query"],
65
+ template="""
66
+ {context}
67
 
68
+ Generate a proof-focused geometry lesson plan for: "{query}"
69
+ Include: student-friendly explanation, real-world link, and activity.
70
+ """
71
+ )
72
+ }
73
+
74
+ # Optional: shortcut to solve simple math problems (like area of rectangle)
75
+ def try_math_solver(query):
76
+ match = re.search(r"rectangle.*l\s*=\s*(\d+).+w\s*=\s*(\d+)", query.lower())
77
+ if match:
78
+ l, w = int(match.group(1)), int(match.group(2))
79
+ return f"The area of the rectangle is {l} × {w} = {l * w} square units."
80
+ return None
81
+
82
+ # RAG function
83
+ def rag_query(query, mode="general"):
84
+ docs = vector_store.similarity_search(query, k=2)
85
+ context = "\n\n".join([doc.page_content for doc in docs])
86
+ prompt = templates[mode].format_prompt(context=context, query=query).to_string()
87
+ return llm.invoke(prompt).content
88
+
89
+ # Gradio app function
90
+ def ask_geometry_sol(query, mode):
91
+ math_result = try_math_solver(query)
92
+ if math_result:
93
+ return math_result
94
+ try:
95
+ return rag_query(query, mode)
96
  except Exception as e:
97
+ return f"⚠️ Error: {type(e).__name__} - {str(e)}"
98
+
99
+ # Gradio UI
100
+ iface = gr.Interface(
101
+ fn=ask_geometry_sol,
102
+ inputs=[
103
+ gr.Textbox(label="Enter your Geometry SOL question or topic"),
104
+ gr.Radio(["general", "lesson plan", "worksheet", "proofs"], value="general", label="Response type")
105
+ ],
106
+ outputs="text",
107
+ title="📘 Virginia Geometry SOL Assistant",
108
+ description="Ask about any 2023 Geometry SOL (Standards of Learning). Get exact quotes, lesson plans, worksheets, or proof-based lessons."
109
  )
110
 
111
  if __name__ == "__main__":
112
+ iface.launch()