FocusFlow Assistant commited on
Commit
46af955
·
0 Parent(s):

Initial commit of FocusFlow backend

Browse files
Files changed (5) hide show
  1. .gitignore +8 -0
  2. backend/database.py +41 -0
  3. backend/main.py +115 -0
  4. backend/rag_engine.py +76 -0
  5. requirements.txt +10 -0
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ venv/
2
+ __pycache__/
3
+ *.pyc
4
+ chroma_db/
5
+ *.db
6
+ data/
7
+ .DS_Store
8
+ .env
backend/database.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import create_engine, Column, Integer, String, Boolean, ForeignKey
2
+ from sqlalchemy.orm import declarative_base, sessionmaker
3
+
4
+ DATABASE_URL = "sqlite:///./focusflow.db"
5
+
6
+ engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
7
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
8
+ Base = declarative_base()
9
+
10
+ class Source(Base):
11
+ __tablename__ = "sources"
12
+
13
+ id = Column(Integer, primary_key=True, index=True)
14
+ filename = Column(String, index=True)
15
+ type = Column(String) # online/offline
16
+ file_path = Column(String)
17
+ is_active = Column(Boolean, default=True)
18
+
19
+ class Schedule(Base):
20
+ __tablename__ = "schedule"
21
+
22
+ id = Column(Integer, primary_key=True, index=True)
23
+ date = Column(String, index=True) # YYYY-MM-DD
24
+ topic_name = Column(String)
25
+ is_completed = Column(Boolean, default=False)
26
+ is_locked = Column(Boolean, default=True)
27
+
28
+ class Mastery(Base):
29
+ __tablename__ = "mastery"
30
+
31
+ id = Column(Integer, primary_key=True, index=True)
32
+ topic_name = Column(String, index=True)
33
+ quiz_score = Column(Integer, default=0)
34
+ flashcard_status = Column(String, default="Not Started")
35
+
36
+ # Create tables
37
+ def init_db():
38
+ Base.metadata.create_all(bind=engine)
39
+
40
+ if __name__ == "__main__":
41
+ init_db()
backend/main.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Depends, UploadFile, File, HTTPException
2
+ from sqlalchemy.orm import Session
3
+ from backend.database import SessionLocal, engine, Source, Schedule, Mastery, init_db
4
+ from backend.rag_engine import ingest_document, query_knowledge_base
5
+ import shutil
6
+ import os
7
+ from pydantic import BaseModel
8
+ from typing import List
9
+
10
+ # Create tables
11
+ init_db()
12
+
13
+ app = FastAPI(title="FocusFlow Backend")
14
+
15
+ # Dependency
16
+ def get_db():
17
+ db = SessionLocal()
18
+ try:
19
+ yield db
20
+ finally:
21
+ db.close()
22
+
23
+ # Pydantic Models
24
+ class ScheduleItem(BaseModel):
25
+ id: int
26
+ date: str
27
+ topic_name: str
28
+ is_completed: bool
29
+ is_locked: bool
30
+
31
+ class UnlockRequest(BaseModel):
32
+ topic_id: int
33
+ quiz_score: int
34
+
35
+ class UnlockResponse(BaseModel):
36
+ success: bool
37
+ message: str
38
+ next_topic_unlocked: bool
39
+
40
+ @app.post("/upload")
41
+ async def upload_file(file: UploadFile = File(...), db: Session = Depends(get_db)):
42
+ file_location = f"data/{file.filename}"
43
+ try:
44
+ with open(file_location, "wb+") as buffer:
45
+ shutil.copyfileobj(file.file, buffer)
46
+ except Exception as e:
47
+ raise HTTPException(status_code=500, detail=f"Could not save file: {str(e)}")
48
+
49
+ # Ingest
50
+ try:
51
+ ingest_document(file_location)
52
+ except Exception as e:
53
+ # cleanup if ingest fails?
54
+ # os.remove(file_location)
55
+ raise HTTPException(status_code=500, detail=f"Ingestion failed: {str(e)}")
56
+
57
+ # Save to DB
58
+ new_source = Source(filename=file.filename, type="local", file_path=file_location, is_active=True)
59
+ db.add(new_source)
60
+ db.commit()
61
+ db.refresh(new_source)
62
+
63
+ return {"message": "File uploaded and ingested successfully", "id": new_source.id}
64
+
65
+ @app.get("/schedule/{date}", response_model=List[ScheduleItem])
66
+ def get_schedule(date: str, db: Session = Depends(get_db)):
67
+ # Assuming date is YYYY-MM-DD
68
+ schedule_items = db.query(Schedule).filter(Schedule.date == date).all()
69
+ if not schedule_items:
70
+ # Just return empty list or maybe 404?
71
+ return []
72
+ return schedule_items
73
+
74
+ @app.post("/unlock_topic", response_model=UnlockResponse)
75
+ def unlock_topic(request: UnlockRequest, db: Session = Depends(get_db)):
76
+ # 1. Update Mastery or Schedule completion
77
+ # Find the schedule item for this topic_id (Assuming topic_id refers to Schedule ID for simplicity, or we link Mastery to Schedule)
78
+ # The prompt says: Takes a topic_id and quiz_score.
79
+
80
+ # Let's find the current topic in Schedule
81
+ current_topic = db.query(Schedule).filter(Schedule.id == request.topic_id).first()
82
+ if not current_topic:
83
+ raise HTTPException(status_code=404, detail="Topic not found")
84
+
85
+ # Update Mastery logic (not explicitly detailed in prompt how Mastery links, but we can creating/update a Mastery record)
86
+ # Check if mastery exists for this topic name
87
+ mastery = db.query(Mastery).filter(Mastery.topic_name == current_topic.topic_name).first()
88
+ if not mastery:
89
+ mastery = Mastery(topic_name=current_topic.topic_name, quiz_score=request.quiz_score)
90
+ db.add(mastery)
91
+ else:
92
+ mastery.quiz_score = request.quiz_score
93
+
94
+ # Update current topic completion if passed? Prompt doesn't specify, but implies progress.
95
+ if request.quiz_score > 60:
96
+ current_topic.is_completed = True
97
+
98
+ # Unlock next topic
99
+ # Logic: Find next topic by ID (assuming sequential)
100
+ next_topic = db.query(Schedule).filter(Schedule.id > current_topic.id).order_by(Schedule.id.asc()).first()
101
+
102
+ next_unlocked = False
103
+ if next_topic:
104
+ next_topic.is_locked = False
105
+ next_unlocked = True
106
+
107
+ db.commit()
108
+ return {"success": True, "message": "Quiz passed. Next topic unlocked.", "next_topic_unlocked": next_unlocked}
109
+ else:
110
+ db.commit()
111
+ return {"success": True, "message": "Quiz score too low to unlock next topic.", "next_topic_unlocked": False}
112
+
113
+ @app.get("/query")
114
+ def query_kb(question: str):
115
+ return query_knowledge_base(question)
backend/rag_engine.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_community.document_loaders import PyPDFLoader
3
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ from langchain_chroma import Chroma
5
+ from langchain_community.embeddings import OllamaEmbeddings
6
+ from langchain_community.llms import Ollama
7
+
8
+ CACHE_DIR = "./chroma_db"
9
+
10
+ def ingest_document(file_path: str):
11
+ """
12
+ Ingests a PDF document into the vector database.
13
+ """
14
+ if not os.path.exists(file_path):
15
+ raise FileNotFoundError(f"File not found: {file_path}")
16
+
17
+ # Load PDF
18
+ loader = PyPDFLoader(file_path)
19
+ docs = loader.load()
20
+
21
+ # Split text
22
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
23
+ splits = splitter.split_documents(docs)
24
+
25
+ # Store in ChromaDB
26
+ # Note: Chroma will automatically persist to disk in newer versions when persist_directory is set
27
+ Chroma.from_documents(
28
+ documents=splits,
29
+ embedding=OllamaEmbeddings(model="nomic-embed-text"),
30
+ persist_directory=CACHE_DIR
31
+ )
32
+ print(f"Ingested {len(splits)} chunks from {file_path}")
33
+
34
+ def query_knowledge_base(question: str):
35
+ """
36
+ Queries the knowledge base and returns an answer with sources.
37
+ """
38
+ # Initialize DB with same embedding function
39
+ db = Chroma(persist_directory=CACHE_DIR, embedding_function=OllamaEmbeddings(model="nomic-embed-text"))
40
+
41
+ # Retrieve top 3 chunks
42
+ results = db.similarity_search(question, k=3)
43
+
44
+ if not results:
45
+ return {"answer": "No relevant information found.", "sources": []}
46
+
47
+ # Format context
48
+ context_str = "\n\n".join([f"Source: {doc.metadata.get('source', 'Unknown')}, Page: {doc.metadata.get('page', 'Unknown')}\nContent: {doc.page_content}" for doc in results])
49
+
50
+ # Generate answer using Ollama
51
+ llm = Ollama(model="llama3.2:1b")
52
+ prompt = f"""You are an intelligent study assistant.
53
+ Answer the question using the provided context, but explain it in your own words.
54
+ Make it sound natural and easy to understand, like a teacher explaining to a student.
55
+
56
+ Context:
57
+ {context_str}
58
+
59
+ Question: {question}
60
+ """
61
+
62
+ response = llm.invoke(prompt)
63
+
64
+ # Format sources for return
65
+ sources = [
66
+ {
67
+ "source": os.path.basename(doc.metadata.get('source', '')),
68
+ "page": doc.metadata.get('page', 0)
69
+ }
70
+ for doc in results
71
+ ]
72
+
73
+ return {
74
+ "answer": response,
75
+ "sources": sources
76
+ }
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ sqlalchemy
4
+ chromadb
5
+ langchain
6
+ langchain-community
7
+ langchain-chroma
8
+ pypdf
9
+ python-multipart
10
+ requests