Spaces:
Sleeping
Sleeping
File size: 12,642 Bytes
27d159a 2969175 27d159a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 | """
Real LLM-based generator using Groq or Google Gemini API.
This ACTUALLY generates responses (unlike SimpleGenerator which just extracts text).
"""
import os
from typing import List, Dict, Optional
import streamlit as st
try:
from groq import Groq
GROQ_AVAILABLE = True
except ImportError:
GROQ_AVAILABLE = False
try:
import google.generativeai as genai
GEMINI_AVAILABLE = True
except ImportError:
GEMINI_AVAILABLE = False
class LLMGenerator:
"""
Actual LLM-based response generation using Groq (Llama-3-70B) or Gemini.
This is what NotebookLM uses - real AI generation, not text extraction.
"""
def __init__(self, provider: str = "groq", api_key: Optional[str] = None):
"""
Initialize LLM generator.
Args:
provider: "groq" or "gemini"
api_key: API key (if None, reads from environment or asks user)
"""
self.provider = provider
self.client = None
self.ready = False
# Get API key
if api_key:
self.api_key = api_key
elif provider == "groq":
self.api_key = os.getenv("GROQ_API_KEY", "")
elif provider == "gemini":
self.api_key = os.getenv("GEMINI_API_KEY", "")
else:
self.api_key = ""
# Initialize client
self._initialize_client()
def _initialize_client(self):
"""Initialize the LLM client."""
if not self.api_key:
return
try:
if self.provider == "groq" and GROQ_AVAILABLE:
# Initialize Groq client with explicit parameters
# Avoid potential proxies kwarg issue by not passing extra config
import os
os.environ["GROQ_API_KEY"] = self.api_key
self.client = Groq() # Will read from environment
self.ready = True
elif self.provider == "gemini" and GEMINI_AVAILABLE:
genai.configure(api_key=self.api_key)
self.client = genai.GenerativeModel('gemini-2.5-flash')
self.ready = True
except Exception as e:
print(f"Failed to initialize {self.provider}: {e}")
self.ready = False
def set_api_key(self, api_key: str):
"""Update API key and reinitialize."""
self.api_key = api_key
self._initialize_client()
def generate_response(
self,
prompt: str,
context: str = "",
use_case: str = "explanation",
metadatas: List[Dict] = None,
temperature: float = 0.7,
max_tokens: int = 1500,
**kwargs
) -> str:
"""
Generate response using actual LLM (NotebookLM-style).
Args:
prompt: User's question
context: Retrieved context from documents
use_case: Response type (explanation, summary, qa, notes)
metadatas: Metadata for citations
temperature: LLM temperature (0.0-1.0)
max_tokens: Maximum response length
Returns:
Generated response with inline citations
"""
if not self.ready:
return (
"⚠️ **LLM not configured.** Please add your API key in the sidebar.\n\n"
"Get a free key:\n"
"- **Groq** (recommended, very fast): https://console.groq.com/keys\n"
"- **Gemini** (Google): https://makersuite.google.com/app/apikey"
)
if not context:
return (
"I don't have enough information from your uploaded documents to answer this question. "
"Please upload relevant study materials first."
)
# Build NotebookLM-style system prompt with strict source grounding
system_prompt = self._build_system_prompt(use_case)
# Build user message with context
user_message = self._build_user_message(prompt, context, metadatas)
try:
# Generate with LLM
if self.provider == "groq":
response = self._generate_groq(system_prompt, user_message, temperature, max_tokens)
elif self.provider == "gemini":
response = self._generate_gemini(system_prompt, user_message, temperature, max_tokens)
else:
return "Error: Unknown provider"
return response
except Exception as e:
return f"Error generating response: {str(e)}\n\nPlease check your API key and try again."
def _build_system_prompt(self, use_case: str) -> str:
"""Build specialized system prompt based on use case."""
base_prompt = (
"You are an expert academic assistant for students, acting like a highly intelligent study buddy. "
"⚠️ CRITICAL RULE: You MUST ONLY use information from the provided context below. "
"DO NOT use your training knowledge. DO NOT infer beyond what's explicitly stated. "
"If the context doesn't contain adequate information to answer the question, you MUST respond: "
"'I cannot find sufficient information about this in the uploaded documents. Please upload materials covering this topic or rephrase your question.'\n\n"
"⚠️ GROUNDING REQUIREMENT: Every statement must be traceable to the provided context. "
"If you cannot find it in the context below, DO NOT answer from general knowledge.\n\n"
"✨ FORMATTING RULES (NotebookLM Style):\n"
"- Use clean, hierarchical Markdown (### Headers, **Bold** terms).\n"
"- Break down long paragraphs into easily readable bullet points.\n"
"- Be direct and concise. Avoid conversational fluff like 'Certainly!' or 'Here is the answer'.\n"
"- If applicable to the prompt, always try to extract a **Real-World Example** from the text to aid understanding.\n\n"
)
if use_case == "explanation":
base_prompt += (
"**Your task:** Explain the concept in a clear, step-by-step manner suitable for students.\n"
"1. Start with a concise, one-sentence definition.\n"
"2. Break down the core mechanics or components using bullet points.\n"
"3. Provide an example (only if found in the text).\n"
"4. Add a 'Key Takeaway' at the end.\n"
)
elif use_case == "summary":
base_prompt += (
"**Your task:** Create a highly structured summary.\n"
"- Start with a brief high-level overview (2 sentences max).\n"
"- Use '### Key Themes' and list the main points as bulleted items.\n"
"- Keep each point concise but factually dense.\n"
)
elif use_case == "qa":
base_prompt += (
"**Your task:** Answer the question directly and comprehensively.\n"
"- Provide the direct answer immediately in the first sentence.\n"
"- Use numbered lists or bullet points to provide supporting details from the context.\n"
"- Use **bold** for key facts, numbers, and formulas.\n"
)
elif use_case == "notes":
base_prompt += (
"**Your task:** Create comprehensive, structured study notes.\n"
"- Use clear section headers (###).\n"
"- Organize information hierarchically (using nested bullet points).\n"
"- Explicitly highlight **Definitions**, **Formulas**, and **Important Dates/Names**.\n"
)
base_prompt += (
"\n**Citation Rules:**\n"
"- You MUST cite your source at the end of every major claim or paragraph using numbered brackets like **[1]**, **[2]** based on the Source number provided in the context.\n"
"- If a claim comes from multiple sources, use **[1, 2]**.\n"
"- Do NOT use the document filename in the citation, ONLY the number.\n"
"- Do NOT make up information - stick strictly to the provided context.\n"
)
return base_prompt
def _build_user_message(self, prompt: str, context: str, metadatas: List[Dict] = None) -> str:
"""Build user message with context and question."""
# Extract source names from metadata
sources = []
if metadatas:
for meta in metadatas:
filename = meta.get('filename', 'Unknown')
clean_name = filename.replace('.pdf', '').replace('.docx', '').replace('.txt', '')
if clean_name not in sources:
sources.append(clean_name)
message = "**Available Sources (USE ONLY THESE):**\n"
for source in sources[:5]: # Show up to 5 sources
message += f"- {source}\n"
message += f"\n**===== START OF CONTEXT (ANSWER ONLY FROM THIS) =====**\n\n{context}\n\n"
message += f"**===== END OF CONTEXT =====**\n\n"
message += f"**Student's Question:** {prompt}\n\n"
message += "**Instructions:** Answer ONLY using the context between the markers above. If the context doesn't contain the answer, say you don't have that information. Cite sources in brackets."
return message
def _generate_groq(self, system_prompt: str, user_message: str, temperature: float, max_tokens: int) -> str:
"""Generate using Groq API (Llama-3.3-70B)."""
completion = self.client.chat.completions.create(
model="llama-3.3-70b-versatile", # Latest 70B model (Dec 2024)
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message}
],
temperature=temperature,
max_tokens=max_tokens,
top_p=0.95,
stream=False
)
return completion.choices[0].message.content
def _generate_gemini(self, system_prompt: str, user_message: str, temperature: float, max_tokens: int) -> str:
"""Generate using Google Gemini API."""
full_prompt = f"{system_prompt}\n\n{user_message}"
response = self.client.generate_content(
full_prompt,
generation_config=genai.GenerationConfig(
temperature=temperature,
max_output_tokens=max_tokens,
top_p=0.95
)
)
return response.text
def is_ready(self) -> bool:
"""Check if LLM is ready to generate."""
return self.ready
def get_provider(self) -> str:
"""Get current provider name."""
if self.provider == "groq":
return "Groq (Llama-3.3-70B)"
elif self.provider == "gemini":
return "Google Gemini 2.5 Flash"
return "Unknown"
def generate(self, prompt: str, temperature: float = 0.3, max_tokens: int = 1500) -> str:
"""
Simple wrapper for backend compatibility.
Generates response from a complete prompt that already includes context.
Args:
prompt: Complete prompt with context already embedded
temperature: LLM temperature (0.0-1.0)
max_tokens: Maximum response length
Returns:
Generated response
"""
if not self.ready:
return (
"⚠️ **LLM not configured.** Please add your API key.\n\n"
"Get a free key:\n"
"- **Groq** (recommended, very fast): https://console.groq.com/keys\n"
"- **Gemini** (Google): https://makersuite.google.com/app/apikey"
)
try:
if self.provider == "groq":
return self._generate_groq(
system_prompt="You are a helpful AI assistant.",
user_message=prompt,
temperature=temperature,
max_tokens=max_tokens
)
elif self.provider == "gemini":
return self._generate_gemini(
system_prompt="You are a helpful AI assistant.",
user_message=prompt,
temperature=temperature,
max_tokens=max_tokens
)
except Exception as e:
return f"Error generating response: {str(e)}"
|