Spaces:
Runtime error
Runtime error
| import os | |
| import streamlit as st | |
| from PyPDF2 import PdfReader | |
| import numpy as np | |
| from groq import Groq | |
| import faiss | |
| import fitz | |
| from io import BytesIO | |
| # Function to set up Groq API client | |
| def get_groq_client(): | |
| api_key = os.getenv("groq_api") | |
| if not api_key: | |
| raise ValueError("Groq API key not found in environment variables.") | |
| return Groq(api_key=api_key) | |
| groq_client = get_groq_client() | |
| # Function to extract text from PDF | |
| def extract_pdf_content(uploaded_file): | |
| pdf_stream = BytesIO(uploaded_file.read()) # Convert to file-like object | |
| doc = fitz.open(stream=pdf_stream, filetype="pdf") | |
| content = "" | |
| for page in doc: | |
| content += page.get_text() | |
| return content | |
| # Function to split content into chunks | |
| def chunk_text(text, chunk_size=500): | |
| words = text.split() | |
| return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)] | |
| # Function to compute embeddings using Groq's Llama3-70B-8192 model | |
| def compute_embeddings(text_chunks): | |
| embeddings = [] | |
| for chunk in text_chunks: | |
| response = groq_client.chat.completions.create( | |
| messages=[{"role": "user", "content": chunk}], | |
| model="llama3-70b-8192" | |
| ) | |
| # Access the embedding content from the response | |
| embedding = response.choices[0].message.content | |
| embedding_array = np.fromstring(embedding, sep=",") # Convert string to NumPy array | |
| embeddings.append(embedding_array) | |
| return np.array(embeddings) | |
| # Function to build FAISS index | |
| def build_faiss_index(embeddings): | |
| dimension = embeddings.shape[1] | |
| index = faiss.IndexFlatL2(dimension) # L2 distance for similarity | |
| index.add(embeddings) | |
| return index | |
| # Function to search in FAISS index | |
| def search_faiss_index(index, query_embedding, text_chunks, top_k=3): | |
| distances, indices = index.search(query_embedding, top_k) | |
| return [(text_chunks[idx], distances[0][i]) for i, idx in enumerate(indices[0])] | |
| # Function to generate professional content using Groq's Llama3-70B-8192 model | |
| def generate_professional_content_groq(topic): | |
| response = groq_client.chat.completions.create( | |
| messages=[{"role": "user", "content": f"Explain '{topic}' in bullet points, highlighting key concepts, examples, and applications for electrical engineering students."}], | |
| model="llama3-70b-8192" | |
| ) | |
| # Access content from the response | |
| return response.choices[0].message.content.strip() | |
| # Function to compute query embedding using Groq's Llama3-70B-8192 model | |
| def compute_query_embedding(query): | |
| response = groq_client.chat.completions.create( | |
| messages=[{"role": "user", "content": query}], | |
| model="llama3-70b-8192" | |
| ) | |
| # Access embedding content and convert it to a NumPy array | |
| embedding = response.choices[0].message.content | |
| return np.fromstring(embedding, sep=",").reshape(1, -1) | |
| # Streamlit app | |
| st.title("Generative AI for Electrical Engineering Education with FAISS and Groq") | |
| st.sidebar.header("AI-Based Tutor with Vector Search") | |
| # File upload section | |
| uploaded_file = st.sidebar.file_uploader("Upload Study Material (PDF)", type=["pdf"]) | |
| topic = st.sidebar.text_input("Enter a topic (e.g., Newton's Third Law)") | |
| if uploaded_file: | |
| try: | |
| # Extract and process file content | |
| content = extract_pdf_content(uploaded_file) | |
| st.sidebar.success(f"{uploaded_file.name} uploaded successfully!") | |
| # Chunk and compute embeddings | |
| chunks = chunk_text(content) | |
| embeddings = compute_embeddings(chunks) | |
| # Build FAISS index | |
| index = build_faiss_index(embeddings) | |
| st.write("**File Processed and Indexed for Search**") | |
| st.write(f"Total chunks created: {len(chunks)}") | |
| except Exception as e: | |
| st.error(f"Error processing file: {e}") | |
| # Generate study material | |
| if st.button("Generate Study Material"): | |
| if topic: | |
| try: | |
| st.header(f"Study Material: {topic}") | |
| # Compute query embedding | |
| query_embedding = compute_query_embedding(topic) | |
| # Search FAISS index | |
| if uploaded_file: | |
| results = search_faiss_index(index, query_embedding, chunks, top_k=3) | |
| st.write("**Relevant Content from Uploaded File:**") | |
| for result, distance in results: | |
| st.write(f"- {result} (Similarity: {distance:.2f})") | |
| else: | |
| st.warning("No file uploaded. Generating AI-based content instead.") | |
| # Generate content using Groq's Llama3-70B-8192 model | |
| ai_content = generate_professional_content_groq(topic) | |
| st.write("**AI-Generated Content (Groq - Llama3-70B-8192):**") | |
| st.write(ai_content) | |
| except Exception as e: | |
| st.error(f"Error generating content: {e}") | |
| else: | |
| st.warning("Please enter a topic!") | |