rayymaxx commited on
Commit
868c437
·
1 Parent(s): 2aa641a
.gitignore ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+ *.db
7
+
8
+ # Env
9
+ .env
10
+ .venv/
11
+ venv/
12
+
13
+ # IDEs
14
+ .vscode/
15
+ .idea/
16
+
17
+ # OS
18
+ .DS_Store
19
+ Thumbs.db
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ EXPOSE 7860
11
+
12
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from backend.server import app
backend/__init__.py ADDED
File without changes
backend/chains/__init__.py ADDED
File without changes
backend/chains/router.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+
3
+ load_dotenv()
4
+
5
+ from langchain_chroma import Chroma
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
+ from backend.llms.custom import CustomChatModel
8
+ from langchain_openai import ChatOpenAI
9
+ from langchain_core.runnables import (
10
+ RunnableBranch,
11
+ RunnableLambda,
12
+ RunnableParallel,
13
+ RunnablePassthrough,
14
+ )
15
+ from langchain_core.output_parsers import StrOutputParser
16
+ from langchain_core.messages import get_buffer_string
17
+ from langchain_core.runnables.history import RunnableWithMessageHistory
18
+ from langchain_community.chat_message_histories import ChatMessageHistory
19
+
20
+ from backend.schemas.api_models import ChatInput, ChatOutput
21
+ from backend.prompts.templates import (
22
+ rag_prompt,
23
+ quiz_generator_prompt,
24
+ flashcard_generator_prompt,
25
+ condense_question_prompt,
26
+ )
27
+
28
+ vector_store = Chroma(
29
+ persist_directory="app/vector_store",
30
+ embedding_function=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"),
31
+ )
32
+ openai_llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.1)
33
+ finetuned_llm = CustomChatModel(
34
+ api_url="https://nutnell-directed-ai.hf.space/generate"
35
+ ).with_fallbacks([openai_llm])
36
+
37
+
38
+ def format_docs(docs):
39
+ return "\n---\n".join(doc.page_content for doc in docs)
40
+
41
+
42
+ def get_sources_from_docs(docs):
43
+ return [
44
+ {
45
+ "source": doc.metadata.get("source_url", ""),
46
+ "name": doc.metadata.get("source_name", ""),
47
+ }
48
+ for doc in docs
49
+ ]
50
+
51
+
52
+ memories = {}
53
+
54
+
55
+ def get_memory_for_session(session_id: str):
56
+ if session_id not in memories:
57
+ memories[session_id] = ChatMessageHistory()
58
+ return memories[session_id]
59
+
60
+
61
+ def EducationalRetriever():
62
+ """Component 1: Identifies relevant curriculum content."""
63
+ return vector_store.as_retriever(search_kwargs={"k": 5})
64
+
65
+
66
+ def AdaptiveConversationChain():
67
+ """Component 2: Produces personalized explanations using structured prompts and context."""
68
+ retriever = EducationalRetriever()
69
+ condense_question_chain = (
70
+ RunnableLambda(
71
+ lambda x: {
72
+ "question": x["input"],
73
+ "chat_history": get_buffer_string(x["chat_history"]),
74
+ }
75
+ )
76
+ | condense_question_prompt
77
+ | openai_llm
78
+ | StrOutputParser()
79
+ )
80
+ return RunnablePassthrough.assign(
81
+ standalone_question=condense_question_chain
82
+ ).assign(
83
+ context=(
84
+ RunnableLambda(lambda x: x["standalone_question"]) | retriever
85
+ ).with_config({"run_name": "EducationalRetriever"})
86
+ ) | RunnableParallel(
87
+ answer=(
88
+ RunnableLambda(
89
+ lambda x: {
90
+ "context": format_docs(x["context"]),
91
+ "question": x["input"],
92
+ "subject": x.get("subject", "the topic"),
93
+ "difficulty_level": x.get("difficulty_level", "beginner"),
94
+ }
95
+ )
96
+ | rag_prompt
97
+ | finetuned_llm.with_config({"run_name": "AdaptiveConversationChain."})
98
+ | StrOutputParser()
99
+ ),
100
+ sources=RunnableLambda(lambda x: get_sources_from_docs(x["context"])),
101
+ )
102
+
103
+
104
+ def ContentGenerator():
105
+ """Component 3: Creates practice questions, flashcards, and assessments."""
106
+ retriever = EducationalRetriever()
107
+
108
+ QuizGenerationChain = RunnablePassthrough.assign(
109
+ context=(RunnableLambda(lambda x: x["input"]) | retriever).with_config(
110
+ {"run_name": "EducationalRetriever_Quiz"}
111
+ )
112
+ ) | RunnableParallel(
113
+ answer=(
114
+ RunnableLambda(
115
+ lambda x: {
116
+ "context": format_docs(x["context"]),
117
+ "subject": x.get("subject", "the provided topic"),
118
+ "difficulty_level": x.get("difficulty_level", "intermediate"),
119
+ }
120
+ )
121
+ | quiz_generator_prompt
122
+ | finetuned_llm.with_config({"run_name": "QuizGenerator"})
123
+ | StrOutputParser()
124
+ ),
125
+ sources=RunnableLambda(lambda x: get_sources_from_docs(x["context"])),
126
+ )
127
+
128
+ FlashcardGenerationChain = RunnablePassthrough.assign(
129
+ context=(RunnableLambda(lambda x: x["input"]) | retriever).with_config(
130
+ {"run_name": "EducationalRetriever_Flashcard"}
131
+ )
132
+ ) | RunnableParallel(
133
+ answer=(
134
+ RunnableLambda(
135
+ lambda x: {
136
+ "context": format_docs(x["context"]),
137
+ "difficulty_level": x.get("difficulty_level", "beginner"),
138
+ }
139
+ )
140
+ | flashcard_generator_prompt
141
+ | finetuned_llm.with_config({"run_name": "FlashcardGenerator"})
142
+ | StrOutputParser()
143
+ ),
144
+ sources=RunnableLambda(lambda x: get_sources_from_docs(x["context"])),
145
+ )
146
+
147
+ return RunnableBranch(
148
+ (lambda x: x.get("request_type") == "quiz_generation", QuizGenerationChain),
149
+ (
150
+ lambda x: x.get("request_type") == "flashcard_creation",
151
+ FlashcardGenerationChain,
152
+ ),
153
+ RunnableLambda(
154
+ lambda x: {"answer": "Unknown content type requested.", "sources": []}
155
+ ),
156
+ )
157
+
158
+
159
+ def LearningAnalyzer():
160
+ """Component 4: Monitors user engagement and adapts response approaches (Placeholder)."""
161
+
162
+ def analyze(input_data):
163
+ print("LOG: LearningAnalyzer executed. User input:", input_data.get("input"))
164
+ return input_data
165
+
166
+ return RunnableLambda(analyze).with_config({"run_name": "LearningAnalyzer"})
167
+
168
+
169
+ def run_educational_assistant():
170
+ """Central function that invokes components based on user requests."""
171
+ return RunnableBranch(
172
+ (lambda x: x.get("request_type") == "tutoring", AdaptiveConversationChain()),
173
+ ContentGenerator(),
174
+ )
175
+
176
+
177
+ educational_assistant_chain = run_educational_assistant() | LearningAnalyzer()
178
+
179
+ chat_chain_with_history = RunnableWithMessageHistory(
180
+ educational_assistant_chain,
181
+ get_memory_for_session,
182
+ input_messages_key="input",
183
+ history_messages_key="chat_history",
184
+ output_messages_key="answer",
185
+ ).with_types(input_type=ChatInput, output_type=ChatOutput)
186
+
187
+ content_generation_chain = (ContentGenerator() | LearningAnalyzer()).with_types(
188
+ input_type=ChatInput, output_type=ChatOutput
189
+ )
190
+
191
+
backend/llms/custom.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ from typing import Any, List, Optional
4
+ from langchain_core.callbacks.manager import CallbackManagerForLLMRun
5
+ from langchain_core.language_models.chat_models import SimpleChatModel
6
+ from langchain_core.messages import BaseMessage, AIMessage
7
+
8
+ class CustomChatModel(SimpleChatModel):
9
+ """A custom chat model that calls a remote FastAPI endpoint."""
10
+
11
+ api_url: str
12
+
13
+ @property
14
+ def _llm_type(self) -> str:
15
+ return "custom_chat_model"
16
+
17
+ def _call(
18
+ self,
19
+ messages: List[BaseMessage],
20
+ stop: Optional[List[str]] = None,
21
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
22
+ **kwargs: Any,
23
+ ) -> str:
24
+ raw_prompt = messages[-1].content
25
+ headers = {"Content-Type": "application/json"}
26
+ data = {"prompt": raw_prompt}
27
+
28
+ try:
29
+ response = requests.post(self.api_url, headers=headers, data=json.dumps(data))
30
+ response.raise_for_status()
31
+
32
+ result = response.json()
33
+ full_text = result.get("response", "")
34
+
35
+ parts = full_text.split("<|start_header_id|>assistant<|end_header_id|>\n\n")
36
+ if len(parts) > 1:
37
+
38
+ assistant_response = parts[1].replace("<|eot_id|>", "").strip()
39
+ if not assistant_response:
40
+ raise ValueError("Model returned an empty response.")
41
+ return assistant_response
42
+ else:
43
+ raise ValueError("Could not parse the model's response.")
44
+
45
+ except (requests.exceptions.RequestException, ValueError) as e:
46
+
47
+ print(f"Custom model failed: {e}. Attempting fallback.")
48
+ raise
backend/prompts/__init__.py ADDED
File without changes
backend/prompts/templates.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.prompts import ChatPromptTemplate
2
+
3
+ CONDENSE_QUESTION_PROMPT_TEMPLATE = """
4
+ Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
5
+
6
+ Chat History:
7
+ {chat_history}
8
+
9
+ Follow Up Input: {question}
10
+ Standalone question:"""
11
+ condense_question_prompt = ChatPromptTemplate.from_template(CONDENSE_QUESTION_PROMPT_TEMPLATE)
12
+
13
+ RAG_PROMPT_TEMPLATE = """
14
+ You are a helpful AI assistant for the DirectEd learning platform.
15
+ Answer the user's question about the subject of '{subject}' based only on the following context.
16
+ Your explanation should be tailored for a '{difficulty_level}' level.
17
+ Cite the source name and URL if possible.
18
+ If you don't know the answer, just say that you don't know.
19
+
20
+ Context:
21
+ {context}
22
+
23
+ Question:
24
+ {question}
25
+
26
+ Helpful Answer:
27
+ """
28
+ rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
29
+
30
+
31
+ QUIZ_GENERATOR_PROMPT_TEMPLATE = """
32
+ You are an expert quiz creator for a tech learning platform.
33
+ Your task is to create at least 5-question multiple-choice quiz based on the provided context for the subject of '{subject}'.
34
+ The questions should be of '{difficulty_level}' difficulty and relevant to the context.
35
+ Provide the question, four options (A, B, C, D), and the correct answer.
36
+ You can generate more than five if the user requests it.
37
+
38
+ Format your response as follows:
39
+ 1. [Question 1]
40
+ A) [Option A]
41
+ B) [Option B]
42
+ C) [Option C]
43
+ D) [Option D]
44
+ Correct Answer: [A, B, C, or D]
45
+
46
+ 2. [Question 2]
47
+ ...
48
+
49
+ Context:
50
+ {context}
51
+
52
+ Quiz Questions:
53
+ """
54
+ quiz_generator_prompt = ChatPromptTemplate.from_template(QUIZ_GENERATOR_PROMPT_TEMPLATE)
55
+
56
+
57
+ FLASHCARD_GENERATOR_PROMPT_TEMPLATE = """
58
+ You are an expert instructional designer for the DirectEd learning platform.
59
+ Based on the provided context, create at least a set of 5 concise flashcards of '{difficulty_level}' difficulty to help a user study.
60
+ Each flashcard should have a 'Front' (a key term or question) and a 'Back' (a clear, simple definition or answer).
61
+
62
+ Format your response exactly as follows:
63
+ **Front:** [Term 1]
64
+ **Back:** [Definition 1]
65
+
66
+ **Front:** [Term 2]
67
+ **Back:** [Definition 2]
68
+
69
+ ...
70
+
71
+ Context:
72
+ {context}
73
+
74
+ Flashcards:
75
+ """
76
+ flashcard_generator_prompt = ChatPromptTemplate.from_template(FLASHCARD_GENERATOR_PROMPT_TEMPLATE)
backend/schemas/__init__.py ADDED
File without changes
backend/schemas/api_models.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Literal, Optional
3
+
4
+ class ChatInput(BaseModel):
5
+ input: str = Field(
6
+ ...,
7
+ description="The user's question or the topic for content generation.",
8
+ examples=["What is the difference between MLOps and LLMOps?"]
9
+ )
10
+ user_type: Literal["student", "instructor"] = Field(
11
+ ...,
12
+ description="The type of user making the request.",
13
+ examples=["student"]
14
+ )
15
+ request_type: Literal["tutoring", "quiz_generation", "flashcard_creation"] = Field( # <-- ADD flashcard_creation
16
+ ...,
17
+ description="The type of request, e.g., a tutoring question or a request to generate content.",
18
+ examples=["quiz_generation"]
19
+ )
20
+
21
+ subject: Optional[str] = Field(
22
+ None,
23
+ description="The subject or topic, can be used for filtering or context.",
24
+ examples=["LLMOps Fundamentals"]
25
+ )
26
+ difficulty_level: Optional[Literal["beginner", "intermediate", "advanced"]] = Field(
27
+ None,
28
+ description="The desired difficulty level for the response or content.",
29
+ examples=["beginner"]
30
+ )
31
+
32
+ class Source(BaseModel):
33
+ source: str = Field(..., description="The URL of the source document.")
34
+ name: str = Field(..., description="The name of the source (e.g., 'DirectEd Curriculum').")
35
+
36
+ class ChatOutput(BaseModel):
37
+ answer: str = Field(..., description="The AI-generated answer or content.")
38
+ sources: Optional[List[Source]] = Field(None, description="A list of source documents used for the answer.")
backend/server.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from fastapi import FastAPI, Depends, HTTPException, status
3
+
4
+ from langserve import add_routes
5
+ from fastapi.middleware.cors import CORSMiddleware
6
+ from dotenv import load_dotenv
7
+
8
+ from backend.chains.router import educational_assistant_chain, chat_chain_with_history, content_generation_chain
9
+ from backend.schemas.api_models import ChatInput
10
+
11
+ load_dotenv()
12
+
13
+ app = FastAPI(
14
+ title="DirectEd AI Assistant Server",
15
+ version="1.0",
16
+ description="A multi-functional API server for the DirectEd AI assistant.",
17
+ )
18
+
19
+ app.add_middleware(
20
+ CORSMiddleware,
21
+ allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"],
22
+ )
23
+
24
+
25
+ add_routes(
26
+ app,
27
+ chat_chain_with_history,
28
+ path="/api/assistant/chat",
29
+ )
30
+
31
+ add_routes(
32
+ app,
33
+ content_generation_chain,
34
+ path="/api/assistant/content/generate",
35
+ )
36
+
37
+ @app.get("/api/assistant/analytics")
38
+ def get_analytics():
39
+ """Placeholder endpoint for retrieving usage analytics."""
40
+ return {"status": "ok", "message": "Analytics endpoint is under development."}
41
+
42
+
43
+ @app.get("/")
44
+ def read_root():
45
+ """Health check endpoint."""
46
+ return {"status": "DirectEd AI Assistant is running"}
47
+
48
+
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ selenium
2
+ webdriver-manager
3
+ pypdf
4
+ fastapi==0.111.0
5
+ pydantic==2.8.2
6
+ uvicorn==0.30.1
7
+ python-dotenv==1.0.1
8
+ langserve==0.3.1
9
+
10
+ langchain
11
+ langchain-core
12
+ langchain-community
13
+ langchain-openai
14
+ langchain-chroma
15
+ langchain-huggingface
16
+
17
+ sentence-transformers
18
+ chromadb
19
+ requests
20
+ beautifulsoup4
21
+ sse_starlette