FocusFlow Assistant commited on
Commit ·
46af955
0
Parent(s):
Initial commit of FocusFlow backend
Browse files- .gitignore +8 -0
- backend/database.py +41 -0
- backend/main.py +115 -0
- backend/rag_engine.py +76 -0
- requirements.txt +10 -0
.gitignore
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
venv/
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
chroma_db/
|
| 5 |
+
*.db
|
| 6 |
+
data/
|
| 7 |
+
.DS_Store
|
| 8 |
+
.env
|
backend/database.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import create_engine, Column, Integer, String, Boolean, ForeignKey
|
| 2 |
+
from sqlalchemy.orm import declarative_base, sessionmaker
|
| 3 |
+
|
| 4 |
+
DATABASE_URL = "sqlite:///./focusflow.db"
|
| 5 |
+
|
| 6 |
+
engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
|
| 7 |
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
| 8 |
+
Base = declarative_base()
|
| 9 |
+
|
| 10 |
+
class Source(Base):
|
| 11 |
+
__tablename__ = "sources"
|
| 12 |
+
|
| 13 |
+
id = Column(Integer, primary_key=True, index=True)
|
| 14 |
+
filename = Column(String, index=True)
|
| 15 |
+
type = Column(String) # online/offline
|
| 16 |
+
file_path = Column(String)
|
| 17 |
+
is_active = Column(Boolean, default=True)
|
| 18 |
+
|
| 19 |
+
class Schedule(Base):
|
| 20 |
+
__tablename__ = "schedule"
|
| 21 |
+
|
| 22 |
+
id = Column(Integer, primary_key=True, index=True)
|
| 23 |
+
date = Column(String, index=True) # YYYY-MM-DD
|
| 24 |
+
topic_name = Column(String)
|
| 25 |
+
is_completed = Column(Boolean, default=False)
|
| 26 |
+
is_locked = Column(Boolean, default=True)
|
| 27 |
+
|
| 28 |
+
class Mastery(Base):
|
| 29 |
+
__tablename__ = "mastery"
|
| 30 |
+
|
| 31 |
+
id = Column(Integer, primary_key=True, index=True)
|
| 32 |
+
topic_name = Column(String, index=True)
|
| 33 |
+
quiz_score = Column(Integer, default=0)
|
| 34 |
+
flashcard_status = Column(String, default="Not Started")
|
| 35 |
+
|
| 36 |
+
# Create tables
|
| 37 |
+
def init_db():
|
| 38 |
+
Base.metadata.create_all(bind=engine)
|
| 39 |
+
|
| 40 |
+
if __name__ == "__main__":
|
| 41 |
+
init_db()
|
backend/main.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, Depends, UploadFile, File, HTTPException
|
| 2 |
+
from sqlalchemy.orm import Session
|
| 3 |
+
from backend.database import SessionLocal, engine, Source, Schedule, Mastery, init_db
|
| 4 |
+
from backend.rag_engine import ingest_document, query_knowledge_base
|
| 5 |
+
import shutil
|
| 6 |
+
import os
|
| 7 |
+
from pydantic import BaseModel
|
| 8 |
+
from typing import List
|
| 9 |
+
|
| 10 |
+
# Create tables
|
| 11 |
+
init_db()
|
| 12 |
+
|
| 13 |
+
app = FastAPI(title="FocusFlow Backend")
|
| 14 |
+
|
| 15 |
+
# Dependency
|
| 16 |
+
def get_db():
|
| 17 |
+
db = SessionLocal()
|
| 18 |
+
try:
|
| 19 |
+
yield db
|
| 20 |
+
finally:
|
| 21 |
+
db.close()
|
| 22 |
+
|
| 23 |
+
# Pydantic Models
|
| 24 |
+
class ScheduleItem(BaseModel):
|
| 25 |
+
id: int
|
| 26 |
+
date: str
|
| 27 |
+
topic_name: str
|
| 28 |
+
is_completed: bool
|
| 29 |
+
is_locked: bool
|
| 30 |
+
|
| 31 |
+
class UnlockRequest(BaseModel):
|
| 32 |
+
topic_id: int
|
| 33 |
+
quiz_score: int
|
| 34 |
+
|
| 35 |
+
class UnlockResponse(BaseModel):
|
| 36 |
+
success: bool
|
| 37 |
+
message: str
|
| 38 |
+
next_topic_unlocked: bool
|
| 39 |
+
|
| 40 |
+
@app.post("/upload")
|
| 41 |
+
async def upload_file(file: UploadFile = File(...), db: Session = Depends(get_db)):
|
| 42 |
+
file_location = f"data/{file.filename}"
|
| 43 |
+
try:
|
| 44 |
+
with open(file_location, "wb+") as buffer:
|
| 45 |
+
shutil.copyfileobj(file.file, buffer)
|
| 46 |
+
except Exception as e:
|
| 47 |
+
raise HTTPException(status_code=500, detail=f"Could not save file: {str(e)}")
|
| 48 |
+
|
| 49 |
+
# Ingest
|
| 50 |
+
try:
|
| 51 |
+
ingest_document(file_location)
|
| 52 |
+
except Exception as e:
|
| 53 |
+
# cleanup if ingest fails?
|
| 54 |
+
# os.remove(file_location)
|
| 55 |
+
raise HTTPException(status_code=500, detail=f"Ingestion failed: {str(e)}")
|
| 56 |
+
|
| 57 |
+
# Save to DB
|
| 58 |
+
new_source = Source(filename=file.filename, type="local", file_path=file_location, is_active=True)
|
| 59 |
+
db.add(new_source)
|
| 60 |
+
db.commit()
|
| 61 |
+
db.refresh(new_source)
|
| 62 |
+
|
| 63 |
+
return {"message": "File uploaded and ingested successfully", "id": new_source.id}
|
| 64 |
+
|
| 65 |
+
@app.get("/schedule/{date}", response_model=List[ScheduleItem])
|
| 66 |
+
def get_schedule(date: str, db: Session = Depends(get_db)):
|
| 67 |
+
# Assuming date is YYYY-MM-DD
|
| 68 |
+
schedule_items = db.query(Schedule).filter(Schedule.date == date).all()
|
| 69 |
+
if not schedule_items:
|
| 70 |
+
# Just return empty list or maybe 404?
|
| 71 |
+
return []
|
| 72 |
+
return schedule_items
|
| 73 |
+
|
| 74 |
+
@app.post("/unlock_topic", response_model=UnlockResponse)
|
| 75 |
+
def unlock_topic(request: UnlockRequest, db: Session = Depends(get_db)):
|
| 76 |
+
# 1. Update Mastery or Schedule completion
|
| 77 |
+
# Find the schedule item for this topic_id (Assuming topic_id refers to Schedule ID for simplicity, or we link Mastery to Schedule)
|
| 78 |
+
# The prompt says: Takes a topic_id and quiz_score.
|
| 79 |
+
|
| 80 |
+
# Let's find the current topic in Schedule
|
| 81 |
+
current_topic = db.query(Schedule).filter(Schedule.id == request.topic_id).first()
|
| 82 |
+
if not current_topic:
|
| 83 |
+
raise HTTPException(status_code=404, detail="Topic not found")
|
| 84 |
+
|
| 85 |
+
# Update Mastery logic (not explicitly detailed in prompt how Mastery links, but we can creating/update a Mastery record)
|
| 86 |
+
# Check if mastery exists for this topic name
|
| 87 |
+
mastery = db.query(Mastery).filter(Mastery.topic_name == current_topic.topic_name).first()
|
| 88 |
+
if not mastery:
|
| 89 |
+
mastery = Mastery(topic_name=current_topic.topic_name, quiz_score=request.quiz_score)
|
| 90 |
+
db.add(mastery)
|
| 91 |
+
else:
|
| 92 |
+
mastery.quiz_score = request.quiz_score
|
| 93 |
+
|
| 94 |
+
# Update current topic completion if passed? Prompt doesn't specify, but implies progress.
|
| 95 |
+
if request.quiz_score > 60:
|
| 96 |
+
current_topic.is_completed = True
|
| 97 |
+
|
| 98 |
+
# Unlock next topic
|
| 99 |
+
# Logic: Find next topic by ID (assuming sequential)
|
| 100 |
+
next_topic = db.query(Schedule).filter(Schedule.id > current_topic.id).order_by(Schedule.id.asc()).first()
|
| 101 |
+
|
| 102 |
+
next_unlocked = False
|
| 103 |
+
if next_topic:
|
| 104 |
+
next_topic.is_locked = False
|
| 105 |
+
next_unlocked = True
|
| 106 |
+
|
| 107 |
+
db.commit()
|
| 108 |
+
return {"success": True, "message": "Quiz passed. Next topic unlocked.", "next_topic_unlocked": next_unlocked}
|
| 109 |
+
else:
|
| 110 |
+
db.commit()
|
| 111 |
+
return {"success": True, "message": "Quiz score too low to unlock next topic.", "next_topic_unlocked": False}
|
| 112 |
+
|
| 113 |
+
@app.get("/query")
|
| 114 |
+
def query_kb(question: str):
|
| 115 |
+
return query_knowledge_base(question)
|
backend/rag_engine.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from langchain_community.document_loaders import PyPDFLoader
|
| 3 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 4 |
+
from langchain_chroma import Chroma
|
| 5 |
+
from langchain_community.embeddings import OllamaEmbeddings
|
| 6 |
+
from langchain_community.llms import Ollama
|
| 7 |
+
|
| 8 |
+
CACHE_DIR = "./chroma_db"
|
| 9 |
+
|
| 10 |
+
def ingest_document(file_path: str):
|
| 11 |
+
"""
|
| 12 |
+
Ingests a PDF document into the vector database.
|
| 13 |
+
"""
|
| 14 |
+
if not os.path.exists(file_path):
|
| 15 |
+
raise FileNotFoundError(f"File not found: {file_path}")
|
| 16 |
+
|
| 17 |
+
# Load PDF
|
| 18 |
+
loader = PyPDFLoader(file_path)
|
| 19 |
+
docs = loader.load()
|
| 20 |
+
|
| 21 |
+
# Split text
|
| 22 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 23 |
+
splits = splitter.split_documents(docs)
|
| 24 |
+
|
| 25 |
+
# Store in ChromaDB
|
| 26 |
+
# Note: Chroma will automatically persist to disk in newer versions when persist_directory is set
|
| 27 |
+
Chroma.from_documents(
|
| 28 |
+
documents=splits,
|
| 29 |
+
embedding=OllamaEmbeddings(model="nomic-embed-text"),
|
| 30 |
+
persist_directory=CACHE_DIR
|
| 31 |
+
)
|
| 32 |
+
print(f"Ingested {len(splits)} chunks from {file_path}")
|
| 33 |
+
|
| 34 |
+
def query_knowledge_base(question: str):
|
| 35 |
+
"""
|
| 36 |
+
Queries the knowledge base and returns an answer with sources.
|
| 37 |
+
"""
|
| 38 |
+
# Initialize DB with same embedding function
|
| 39 |
+
db = Chroma(persist_directory=CACHE_DIR, embedding_function=OllamaEmbeddings(model="nomic-embed-text"))
|
| 40 |
+
|
| 41 |
+
# Retrieve top 3 chunks
|
| 42 |
+
results = db.similarity_search(question, k=3)
|
| 43 |
+
|
| 44 |
+
if not results:
|
| 45 |
+
return {"answer": "No relevant information found.", "sources": []}
|
| 46 |
+
|
| 47 |
+
# Format context
|
| 48 |
+
context_str = "\n\n".join([f"Source: {doc.metadata.get('source', 'Unknown')}, Page: {doc.metadata.get('page', 'Unknown')}\nContent: {doc.page_content}" for doc in results])
|
| 49 |
+
|
| 50 |
+
# Generate answer using Ollama
|
| 51 |
+
llm = Ollama(model="llama3.2:1b")
|
| 52 |
+
prompt = f"""You are an intelligent study assistant.
|
| 53 |
+
Answer the question using the provided context, but explain it in your own words.
|
| 54 |
+
Make it sound natural and easy to understand, like a teacher explaining to a student.
|
| 55 |
+
|
| 56 |
+
Context:
|
| 57 |
+
{context_str}
|
| 58 |
+
|
| 59 |
+
Question: {question}
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
response = llm.invoke(prompt)
|
| 63 |
+
|
| 64 |
+
# Format sources for return
|
| 65 |
+
sources = [
|
| 66 |
+
{
|
| 67 |
+
"source": os.path.basename(doc.metadata.get('source', '')),
|
| 68 |
+
"page": doc.metadata.get('page', 0)
|
| 69 |
+
}
|
| 70 |
+
for doc in results
|
| 71 |
+
]
|
| 72 |
+
|
| 73 |
+
return {
|
| 74 |
+
"answer": response,
|
| 75 |
+
"sources": sources
|
| 76 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
sqlalchemy
|
| 4 |
+
chromadb
|
| 5 |
+
langchain
|
| 6 |
+
langchain-community
|
| 7 |
+
langchain-chroma
|
| 8 |
+
pypdf
|
| 9 |
+
python-multipart
|
| 10 |
+
requests
|