Spaces:
Sleeping
Sleeping
| from pymongo import MongoClient | |
| from datetime import datetime | |
| import openai | |
| import google.generativeai as genai | |
| import streamlit as st | |
| from db import courses_collection2, faculty_collection, students_collection, vectors_collection | |
| from PIL import Image | |
| import PyPDF2, docx, io | |
| from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document | |
| from bson import ObjectId | |
| from dotenv import load_dotenv | |
| import os | |
| from create_course import courses_collection | |
| load_dotenv() | |
| MONGO_URI = os.getenv('MONGO_URI') | |
| OPENAI_KEY = os.getenv('OPENAI_KEY') | |
| GEMINI_KEY = os.getenv('GEMINI_KEY') | |
| client = MongoClient(MONGO_URI) | |
| db = client['novascholar_db'] | |
| resources_collection = db['resources'] | |
| # Configure APIs | |
| openai.api_key = OPENAI_KEY | |
| genai.configure(api_key=GEMINI_KEY) | |
| model = genai.GenerativeModel('gemini-pro') | |
| def upload_resource(course_id, session_id, file_name, file_content, material_type): | |
| # material_data = { | |
| # "session_id": session_id, | |
| # "course_id": course_id, | |
| # "file_name": file_name, | |
| # "file_content": file_content, | |
| # "material_type": material_type, | |
| # "uploaded_at": datetime.utcnow() | |
| # } | |
| # return resources_collection.insert_one(material_data) | |
| # resource_id = ObjectId() | |
| # Extract text content from the file | |
| text_content = extract_text_from_file(file_content) | |
| # Check if a resource with this file name already exists | |
| existing_resource = resources_collection.find_one({ | |
| "session_id": session_id, | |
| "file_name": file_name | |
| }) | |
| if existing_resource: | |
| return existing_resource["_id"] | |
| # Read the file content | |
| file_content.seek(0) # Reset the file pointer to the beginning | |
| original_file_content = file_content.read() | |
| resource_data = { | |
| "_id": ObjectId(), | |
| "course_id": course_id, | |
| "session_id": session_id, | |
| "file_name": file_name, | |
| "file_type": file_content.type, | |
| "text_content": text_content, | |
| "file_content": original_file_content, # Store the original file content | |
| "material_type": material_type, | |
| "uploaded_at": datetime.utcnow() | |
| } | |
| resources_collection.insert_one(resource_data) | |
| resource_id = resource_data["_id"] | |
| courses_collection.update_one( | |
| { | |
| "course_id": course_id, | |
| "sessions.session_id": session_id | |
| }, | |
| { | |
| "$push": {"sessions.$.pre_class.resources": resource_id} | |
| } | |
| ) | |
| # print("End of Upload Resource, Resource ID is: ", resource_id) | |
| # return resource_id | |
| if text_content: | |
| create_vector_store(text_content, resource_id) | |
| return resource_id | |
| def assignment_submit(student_id, course_id, session_id, assignment_id, file_name, file_content, text_content, material_type): | |
| # Read the file content | |
| file_content.seek(0) # Reset the file pointer to the beginning | |
| original_file_content = file_content.read() | |
| assignment_data = { | |
| "student_id": student_id, | |
| "course_id": course_id, | |
| "session_id": session_id, | |
| "assignment_id": assignment_id, | |
| "file_name": file_name, | |
| "file_type": file_content.type, | |
| "file_content": original_file_content, # Store the original file content | |
| "text_content": text_content, | |
| "material_type": material_type, | |
| "submitted_at": datetime.utcnow(), | |
| "file_url": "sample_url" | |
| } | |
| try: | |
| courses_collection2.update_one( | |
| { | |
| "course_id": course_id, | |
| "sessions.session_id": session_id, | |
| "sessions.post_class.assignments.id": assignment_id | |
| }, | |
| { | |
| "$push": {"sessions.$.post_class.assignments.$[assignment].submissions": assignment_data} | |
| }, | |
| array_filters=[{"assignment.id": assignment_id}] | |
| ) | |
| return True | |
| except Exception as db_error: | |
| print(f"Error saving submission: {str(db_error)}") | |
| return False | |
| def extract_text_from_file(uploaded_file): | |
| text = "" | |
| file_type = uploaded_file.type | |
| try: | |
| if file_type == "text/plain": | |
| text = uploaded_file.getvalue().decode("utf-8") | |
| elif file_type == "application/pdf": | |
| pdf_reader = PyPDF2.PdfReader(io.BytesIO(uploaded_file.getvalue())) | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() + "\n" | |
| elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": | |
| doc = docx.Document(io.BytesIO(uploaded_file.getvalue())) | |
| for para in doc.paragraphs: | |
| text += para.text + "\n" | |
| return text | |
| except Exception as e: | |
| st.error(f"Error processing file: {str(e)}") | |
| return None | |
| def get_embedding(text): | |
| response = openai.embeddings.create( | |
| model="text-embedding-ada-002", | |
| input=text | |
| ) | |
| return response.data[0].embedding | |
| def create_vector_store(text, resource_id): | |
| # resource_object_id = ObjectId(resource_id) | |
| # Ensure resource_id is an ObjectId | |
| # if not isinstance(resource_id, ObjectId): | |
| # resource_id = ObjectId(resource_id) | |
| existing_vector = vectors_collection.find_one({ | |
| "resource_id": resource_id, | |
| "text": text | |
| }) | |
| if existing_vector: | |
| print(f"Vector already exists for Resource ID: {resource_id}") | |
| return | |
| print(f"In Vector Store method, Resource ID is: {resource_id}") | |
| document = Document(text=text) | |
| embedding = get_embedding(text) | |
| vector_data = { | |
| "resource_id": resource_id, | |
| "vector": embedding, | |
| "text": text, | |
| "created_at": datetime.utcnow() | |
| } | |
| vectors_collection.insert_one(vector_data) | |
| # return VectorStoreIndex.from_documents([document]) |