File size: 6,093 Bytes
b809a0b
752bc47
 
 
651347d
b809a0b
651347d
 
 
 
 
 
 
b809a0b
651347d
 
752bc47
 
651347d
752bc47
b809a0b
651347d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c745c24
 
 
 
651347d
 
752bc47
c745c24
 
 
 
651347d
b809a0b
651347d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
752bc47
 
 
651347d
b809a0b
752bc47
 
651347d
 
752bc47
651347d
b809a0b
651347d
 
 
 
b809a0b
651347d
c745c24
651347d
c745c24
 
 
 
 
 
651347d
c745c24
651347d
 
c745c24
 
 
 
651347d
 
 
 
 
 
752bc47
651347d
 
 
 
 
 
 
 
 
 
 
c745c24
651347d
 
 
b809a0b
651347d
 
 
 
 
 
 
 
 
 
 
 
 
b809a0b
651347d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import gradio as gr
import PyPDF2
import os
from openai import OpenAI
import sys # Import sys to write to stderr

# --- Configuration & Client Initialization ---
# It's good practice to check for the API key at startup.
# This provides a clear error if the secret isn't set in Hugging Face Spaces.
NEBIUS_API_KEY = os.getenv("NEBIUS_API_KEY")
if not NEBIUS_API_KEY:
    # Use a more descriptive error message.
    raise ValueError("API Key not found. Please set the NEBIUS_API_KEY secret in your Hugging Face Space settings.")

# Initialize the OpenAI client with your custom endpoint.
# Ensure this base_url is correct and publicly accessible.
client = OpenAI(
    base_url="https://api.studio.nebius.com/v1/",
    api_key=NEBIUS_API_KEY
)

# --- Core Functions ---

def extract_text_from_pdf(pdf_file):
    """Extracts text from an uploaded PDF file object."""
    # pdf_file is a temporary file object from Gradio.
    if not pdf_file:
        return ""
    try:
        reader = PyPDF2.PdfReader(pdf_file.name)
        text = ""
        # Extract text from all pages except the first one.
        for i, page in enumerate(reader.pages):
            if i == 0:
                continue
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}", file=sys.stderr)
        return ""


def get_llm_answer(pdf_text, question, history):
    """
    Sends the context, history, and question to the LLM and returns the answer.
    """
    # Truncate the PDF text to avoid exceeding the model's context limit.
    context = pdf_text[:16000]

    # The system prompt guides the model's behavior.
    system_prompt = '''You are a helpful assistant who specializes in body composition, diet, and exercise.
    Answer questions based on the provided document. Encourage the user to seek a professional
    if they have serious concerns whenever appropriate.'''

    # Construct the message payload for the API.
    messages = [{"role": "system", "content": system_prompt}, 
                {"role": "user", "content": f"Use the following document to answer my question:\n\n{context}"}, 
                {"role":"user", "content": f"Question: {question}"}
    ]

    # Add the conversation history.
    if history:
        for msg in history:
            if msg["role"] in ["user", "assistant"]:
                messages.append(msg)
    # Add the new user question
    messages.append({"role": "user", "content": question})

    try:
        response = client.chat.completions.create(
            model="meta-llama/Meta-Llama-3.1-70B-Instruct",
            temperature=0.6,
            top_p=0.95,
            messages=messages
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error calling OpenAI API: {e}", file=sys.stderr)
        # Return a user-friendly error message.
        return "Sorry, I encountered an error while trying to generate a response. Please check the logs."


# --- Gradio Interface Logic ---

# Use a class to manage state (the extracted PDF text).
class PDFChatbot:
    def __init__(self):
        self.pdf_text = None
        self.pdf_filename = None

    def upload_pdf(self, pdf_file):
        if pdf_file is None:
            return "Status: No PDF uploaded."
        
        self.pdf_text = extract_text_from_pdf(pdf_file)
        self.pdf_filename = os.path.basename(pdf_file.name)

        if not self.pdf_text:
            return f"Status: Could not extract text from {self.pdf_filename}. It might be empty, scanned, or protected."
            
        return f"Status: Successfully processed {self.pdf_filename}. You can now ask questions."

    def chat(self, user_message, history):
        if self.pdf_text is None:
            # Add an instruction to the chatbot window if no PDF is uploaded.
            #history.append([user_message, "Please upload a PDF document first."])
            return "Please upload a PDF document first.", history
        
        if history is None:
            history = []
        context_history = [msg for msg in history if msg["role"] in ["user", "assistant"]]
        # Get the answer from the LLM.
        answer = get_llm_answer(self.pdf_text, user_message, context_history)
        
        # Append the user message and the assistant's answer to the history.
        history = history + [
            {"role": "user", "content": user_message }, 
            {"role": "assistant", "content": answer }
        ]
        
        # Return an empty string to clear the input textbox and the updated history.
        return "", history

# Instantiate the bot.
pdf_bot = PDFChatbot()

# Build the Gradio UI.
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Body Composition Agent\nUpload a document about your body composition and ask questions about its content.")
    
    with gr.Row():
        with gr.Column(scale=1):
            pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"])
            upload_btn = gr.Button("Process PDF", variant="primary")
            upload_status = gr.Textbox(label="Status", interactive=False, value="Status: Waiting for PDF...")
        
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(type="messages", label="Chat History", height=500)
            msg_textbox = gr.Textbox(label="Your Question:", interactive=True, placeholder="Type your question here...")
            # Clear button is useful for starting a new conversation.
            clear_btn = gr.ClearButton([msg_textbox, chatbot], value="Clear Chat")

    # Wire up the event listeners.
    upload_btn.click(
        fn=pdf_bot.upload_pdf,
        inputs=[pdf_file],
        outputs=[upload_status]
    )
    
    # Allow submitting questions with Enter key or the button.
    msg_textbox.submit(
        fn=pdf_bot.chat,
        inputs=[msg_textbox, chatbot],
        outputs=[msg_textbox, chatbot]
    )

# Launch the app.
demo.launch(debug=True) # Use debug=True to see errors in the console.