Spaces:
Sleeping
Sleeping
File size: 6,093 Bytes
b809a0b 752bc47 651347d b809a0b 651347d b809a0b 651347d 752bc47 651347d 752bc47 b809a0b 651347d c745c24 651347d 752bc47 c745c24 651347d b809a0b 651347d 752bc47 651347d b809a0b 752bc47 651347d 752bc47 651347d b809a0b 651347d b809a0b 651347d c745c24 651347d c745c24 651347d c745c24 651347d c745c24 651347d 752bc47 651347d c745c24 651347d b809a0b 651347d b809a0b 651347d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 | import gradio as gr
import PyPDF2
import os
from openai import OpenAI
import sys # Import sys to write to stderr
# --- Configuration & Client Initialization ---
# It's good practice to check for the API key at startup.
# This provides a clear error if the secret isn't set in Hugging Face Spaces.
NEBIUS_API_KEY = os.getenv("NEBIUS_API_KEY")
if not NEBIUS_API_KEY:
# Use a more descriptive error message.
raise ValueError("API Key not found. Please set the NEBIUS_API_KEY secret in your Hugging Face Space settings.")
# Initialize the OpenAI client with your custom endpoint.
# Ensure this base_url is correct and publicly accessible.
client = OpenAI(
base_url="https://api.studio.nebius.com/v1/",
api_key=NEBIUS_API_KEY
)
# --- Core Functions ---
def extract_text_from_pdf(pdf_file):
"""Extracts text from an uploaded PDF file object."""
# pdf_file is a temporary file object from Gradio.
if not pdf_file:
return ""
try:
reader = PyPDF2.PdfReader(pdf_file.name)
text = ""
# Extract text from all pages except the first one.
for i, page in enumerate(reader.pages):
if i == 0:
continue
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
return text
except Exception as e:
print(f"Error reading PDF: {e}", file=sys.stderr)
return ""
def get_llm_answer(pdf_text, question, history):
"""
Sends the context, history, and question to the LLM and returns the answer.
"""
# Truncate the PDF text to avoid exceeding the model's context limit.
context = pdf_text[:16000]
# The system prompt guides the model's behavior.
system_prompt = '''You are a helpful assistant who specializes in body composition, diet, and exercise.
Answer questions based on the provided document. Encourage the user to seek a professional
if they have serious concerns whenever appropriate.'''
# Construct the message payload for the API.
messages = [{"role": "system", "content": system_prompt},
{"role": "user", "content": f"Use the following document to answer my question:\n\n{context}"},
{"role":"user", "content": f"Question: {question}"}
]
# Add the conversation history.
if history:
for msg in history:
if msg["role"] in ["user", "assistant"]:
messages.append(msg)
# Add the new user question
messages.append({"role": "user", "content": question})
try:
response = client.chat.completions.create(
model="meta-llama/Meta-Llama-3.1-70B-Instruct",
temperature=0.6,
top_p=0.95,
messages=messages
)
return response.choices[0].message.content
except Exception as e:
print(f"Error calling OpenAI API: {e}", file=sys.stderr)
# Return a user-friendly error message.
return "Sorry, I encountered an error while trying to generate a response. Please check the logs."
# --- Gradio Interface Logic ---
# Use a class to manage state (the extracted PDF text).
class PDFChatbot:
def __init__(self):
self.pdf_text = None
self.pdf_filename = None
def upload_pdf(self, pdf_file):
if pdf_file is None:
return "Status: No PDF uploaded."
self.pdf_text = extract_text_from_pdf(pdf_file)
self.pdf_filename = os.path.basename(pdf_file.name)
if not self.pdf_text:
return f"Status: Could not extract text from {self.pdf_filename}. It might be empty, scanned, or protected."
return f"Status: Successfully processed {self.pdf_filename}. You can now ask questions."
def chat(self, user_message, history):
if self.pdf_text is None:
# Add an instruction to the chatbot window if no PDF is uploaded.
#history.append([user_message, "Please upload a PDF document first."])
return "Please upload a PDF document first.", history
if history is None:
history = []
context_history = [msg for msg in history if msg["role"] in ["user", "assistant"]]
# Get the answer from the LLM.
answer = get_llm_answer(self.pdf_text, user_message, context_history)
# Append the user message and the assistant's answer to the history.
history = history + [
{"role": "user", "content": user_message },
{"role": "assistant", "content": answer }
]
# Return an empty string to clear the input textbox and the updated history.
return "", history
# Instantiate the bot.
pdf_bot = PDFChatbot()
# Build the Gradio UI.
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# Body Composition Agent\nUpload a document about your body composition and ask questions about its content.")
with gr.Row():
with gr.Column(scale=1):
pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"])
upload_btn = gr.Button("Process PDF", variant="primary")
upload_status = gr.Textbox(label="Status", interactive=False, value="Status: Waiting for PDF...")
with gr.Column(scale=2):
chatbot = gr.Chatbot(type="messages", label="Chat History", height=500)
msg_textbox = gr.Textbox(label="Your Question:", interactive=True, placeholder="Type your question here...")
# Clear button is useful for starting a new conversation.
clear_btn = gr.ClearButton([msg_textbox, chatbot], value="Clear Chat")
# Wire up the event listeners.
upload_btn.click(
fn=pdf_bot.upload_pdf,
inputs=[pdf_file],
outputs=[upload_status]
)
# Allow submitting questions with Enter key or the button.
msg_textbox.submit(
fn=pdf_bot.chat,
inputs=[msg_textbox, chatbot],
outputs=[msg_textbox, chatbot]
)
# Launch the app.
demo.launch(debug=True) # Use debug=True to see errors in the console.
|