Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import PyPDF2 | |
| import os | |
| from openai import OpenAI | |
| import sys # Import sys to write to stderr | |
| # --- Configuration & Client Initialization --- | |
| # It's good practice to check for the API key at startup. | |
| # This provides a clear error if the secret isn't set in Hugging Face Spaces. | |
| NEBIUS_API_KEY = os.getenv("NEBIUS_API_KEY") | |
| if not NEBIUS_API_KEY: | |
| # Use a more descriptive error message. | |
| raise ValueError("API Key not found. Please set the NEBIUS_API_KEY secret in your Hugging Face Space settings.") | |
| # Initialize the OpenAI client with your custom endpoint. | |
| # Ensure this base_url is correct and publicly accessible. | |
| client = OpenAI( | |
| base_url="https://api.studio.nebius.com/v1/", | |
| api_key=NEBIUS_API_KEY | |
| ) | |
| # --- Core Functions --- | |
| def extract_text_from_pdf(pdf_file): | |
| """Extracts text from an uploaded PDF file object.""" | |
| # pdf_file is a temporary file object from Gradio. | |
| if not pdf_file: | |
| return "" | |
| try: | |
| reader = PyPDF2.PdfReader(pdf_file.name) | |
| text = "" | |
| # Extract text from all pages except the first one. | |
| for i, page in enumerate(reader.pages): | |
| if i == 0: | |
| continue | |
| page_text = page.extract_text() | |
| if page_text: | |
| text += page_text + "\n" | |
| return text | |
| except Exception as e: | |
| print(f"Error reading PDF: {e}", file=sys.stderr) | |
| return "" | |
| def get_llm_answer(pdf_text, question, history): | |
| """ | |
| Sends the context, history, and question to the LLM and returns the answer. | |
| """ | |
| # Truncate the PDF text to avoid exceeding the model's context limit. | |
| context = pdf_text[:16000] | |
| # The system prompt guides the model's behavior. | |
| system_prompt = '''You are a helpful assistant who specializes in body composition, diet, and exercise. | |
| Answer questions based on the provided document. Encourage the user to seek a professional | |
| if they have serious concerns whenever appropriate.''' | |
| # Construct the message payload for the API. | |
| messages = [{"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": f"Use the following document to answer my question:\n\n{context}"}, | |
| {"role":"user", "content": f"Question: {question}"} | |
| ] | |
| # Add the conversation history. | |
| if history: | |
| for msg in history: | |
| if msg["role"] in ["user", "assistant"]: | |
| messages.append(msg) | |
| # Add the new user question | |
| messages.append({"role": "user", "content": question}) | |
| try: | |
| response = client.chat.completions.create( | |
| model="meta-llama/Meta-Llama-3.1-70B-Instruct", | |
| temperature=0.6, | |
| top_p=0.95, | |
| messages=messages | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| print(f"Error calling OpenAI API: {e}", file=sys.stderr) | |
| # Return a user-friendly error message. | |
| return "Sorry, I encountered an error while trying to generate a response. Please check the logs." | |
| # --- Gradio Interface Logic --- | |
| # Use a class to manage state (the extracted PDF text). | |
| class PDFChatbot: | |
| def __init__(self): | |
| self.pdf_text = None | |
| self.pdf_filename = None | |
| def upload_pdf(self, pdf_file): | |
| if pdf_file is None: | |
| return "Status: No PDF uploaded." | |
| self.pdf_text = extract_text_from_pdf(pdf_file) | |
| self.pdf_filename = os.path.basename(pdf_file.name) | |
| if not self.pdf_text: | |
| return f"Status: Could not extract text from {self.pdf_filename}. It might be empty, scanned, or protected." | |
| return f"Status: Successfully processed {self.pdf_filename}. You can now ask questions." | |
| def chat(self, user_message, history): | |
| if self.pdf_text is None: | |
| # Add an instruction to the chatbot window if no PDF is uploaded. | |
| #history.append([user_message, "Please upload a PDF document first."]) | |
| return "Please upload a PDF document first.", history | |
| if history is None: | |
| history = [] | |
| context_history = [msg for msg in history if msg["role"] in ["user", "assistant"]] | |
| # Get the answer from the LLM. | |
| answer = get_llm_answer(self.pdf_text, user_message, context_history) | |
| # Append the user message and the assistant's answer to the history. | |
| history = history + [ | |
| {"role": "user", "content": user_message }, | |
| {"role": "assistant", "content": answer } | |
| ] | |
| # Return an empty string to clear the input textbox and the updated history. | |
| return "", history | |
| # Instantiate the bot. | |
| pdf_bot = PDFChatbot() | |
| # Build the Gradio UI. | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# Body Composition Agent\nUpload a document about your body composition and ask questions about its content.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| upload_btn = gr.Button("Process PDF", variant="primary") | |
| upload_status = gr.Textbox(label="Status", interactive=False, value="Status: Waiting for PDF...") | |
| with gr.Column(scale=2): | |
| chatbot = gr.Chatbot(type="messages", label="Chat History", height=500) | |
| msg_textbox = gr.Textbox(label="Your Question:", interactive=True, placeholder="Type your question here...") | |
| # Clear button is useful for starting a new conversation. | |
| clear_btn = gr.ClearButton([msg_textbox, chatbot], value="Clear Chat") | |
| # Wire up the event listeners. | |
| upload_btn.click( | |
| fn=pdf_bot.upload_pdf, | |
| inputs=[pdf_file], | |
| outputs=[upload_status] | |
| ) | |
| # Allow submitting questions with Enter key or the button. | |
| msg_textbox.submit( | |
| fn=pdf_bot.chat, | |
| inputs=[msg_textbox, chatbot], | |
| outputs=[msg_textbox, chatbot] | |
| ) | |
| # Launch the app. | |
| demo.launch(debug=True) # Use debug=True to see errors in the console. | |