import gradio as gr import PyPDF2 import os from openai import OpenAI import sys # Import sys to write to stderr # --- Configuration & Client Initialization --- # It's good practice to check for the API key at startup. # This provides a clear error if the secret isn't set in Hugging Face Spaces. NEBIUS_API_KEY = os.getenv("NEBIUS_API_KEY") if not NEBIUS_API_KEY: # Use a more descriptive error message. raise ValueError("API Key not found. Please set the NEBIUS_API_KEY secret in your Hugging Face Space settings.") # Initialize the OpenAI client with your custom endpoint. # Ensure this base_url is correct and publicly accessible. client = OpenAI( base_url="https://api.studio.nebius.com/v1/", api_key=NEBIUS_API_KEY ) # --- Core Functions --- def extract_text_from_pdf(pdf_file): """Extracts text from an uploaded PDF file object.""" # pdf_file is a temporary file object from Gradio. if not pdf_file: return "" try: reader = PyPDF2.PdfReader(pdf_file.name) text = "" # Extract text from all pages except the first one. for i, page in enumerate(reader.pages): if i == 0: continue page_text = page.extract_text() if page_text: text += page_text + "\n" return text except Exception as e: print(f"Error reading PDF: {e}", file=sys.stderr) return "" def get_llm_answer(pdf_text, question, history): """ Sends the context, history, and question to the LLM and returns the answer. """ # Truncate the PDF text to avoid exceeding the model's context limit. context = pdf_text[:16000] # The system prompt guides the model's behavior. system_prompt = '''You are a helpful assistant who specializes in body composition, diet, and exercise. Answer questions based on the provided document. Encourage the user to seek a professional if they have serious concerns whenever appropriate.''' # Construct the message payload for the API. messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": f"Use the following document to answer my question:\n\n{context}"}, {"role":"user", "content": f"Question: {question}"} ] # Add the conversation history. if history: for msg in history: if msg["role"] in ["user", "assistant"]: messages.append(msg) # Add the new user question messages.append({"role": "user", "content": question}) try: response = client.chat.completions.create( model="meta-llama/Meta-Llama-3.1-70B-Instruct", temperature=0.6, top_p=0.95, messages=messages ) return response.choices[0].message.content except Exception as e: print(f"Error calling OpenAI API: {e}", file=sys.stderr) # Return a user-friendly error message. return "Sorry, I encountered an error while trying to generate a response. Please check the logs." # --- Gradio Interface Logic --- # Use a class to manage state (the extracted PDF text). class PDFChatbot: def __init__(self): self.pdf_text = None self.pdf_filename = None def upload_pdf(self, pdf_file): if pdf_file is None: return "Status: No PDF uploaded." self.pdf_text = extract_text_from_pdf(pdf_file) self.pdf_filename = os.path.basename(pdf_file.name) if not self.pdf_text: return f"Status: Could not extract text from {self.pdf_filename}. It might be empty, scanned, or protected." return f"Status: Successfully processed {self.pdf_filename}. You can now ask questions." def chat(self, user_message, history): if self.pdf_text is None: # Add an instruction to the chatbot window if no PDF is uploaded. #history.append([user_message, "Please upload a PDF document first."]) return "Please upload a PDF document first.", history if history is None: history = [] context_history = [msg for msg in history if msg["role"] in ["user", "assistant"]] # Get the answer from the LLM. answer = get_llm_answer(self.pdf_text, user_message, context_history) # Append the user message and the assistant's answer to the history. history = history + [ {"role": "user", "content": user_message }, {"role": "assistant", "content": answer } ] # Return an empty string to clear the input textbox and the updated history. return "", history # Instantiate the bot. pdf_bot = PDFChatbot() # Build the Gradio UI. with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# Body Composition Agent\nUpload a document about your body composition and ask questions about its content.") with gr.Row(): with gr.Column(scale=1): pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"]) upload_btn = gr.Button("Process PDF", variant="primary") upload_status = gr.Textbox(label="Status", interactive=False, value="Status: Waiting for PDF...") with gr.Column(scale=2): chatbot = gr.Chatbot(type="messages", label="Chat History", height=500) msg_textbox = gr.Textbox(label="Your Question:", interactive=True, placeholder="Type your question here...") # Clear button is useful for starting a new conversation. clear_btn = gr.ClearButton([msg_textbox, chatbot], value="Clear Chat") # Wire up the event listeners. upload_btn.click( fn=pdf_bot.upload_pdf, inputs=[pdf_file], outputs=[upload_status] ) # Allow submitting questions with Enter key or the button. msg_textbox.submit( fn=pdf_bot.chat, inputs=[msg_textbox, chatbot], outputs=[msg_textbox, chatbot] ) # Launch the app. demo.launch(debug=True) # Use debug=True to see errors in the console.