admin08077 commited on
Commit
630bdac
·
verified ·
1 Parent(s): 5173b34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -99
app.py CHANGED
@@ -1,66 +1,75 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
3
  import nltk
 
 
 
 
4
  import PyPDF2
 
5
 
6
- # Download the necessary NLTK data (using the correct resource "punkt")
7
  nltk.download("punkt", quiet=True)
8
 
9
- # Initialize the Hugging Face Inference Client
10
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
11
 
12
- # Function to split text into manageable chunks
13
- def chunk_text(text, max_chunk_size=1500):
14
- from nltk.tokenize import sent_tokenize
15
- sentences = sent_tokenize(text)
16
- chunks = []
17
- current_chunk = ""
18
- current_tokens = 0
19
 
20
- for sentence in sentences:
21
- sentence_tokens = len(sentence.split())
22
- if current_tokens + sentence_tokens <= max_chunk_size:
23
- current_chunk += " " + sentence
24
- current_tokens += sentence_tokens
25
- else:
26
- if current_chunk:
27
- chunks.append(current_chunk.strip())
28
- current_chunk = sentence
29
- current_tokens = sentence_tokens
30
- if current_chunk:
31
- chunks.append(current_chunk.strip())
32
- return chunks
33
 
34
- # Function to provide responses for each text chunk
35
- def respond_chunked(message, history, system_message, max_tokens, temperature, top_p, file_content):
36
- if not file_content.strip():
37
- return "No file content available to provide context."
 
 
 
 
38
 
39
- chunks = chunk_text(file_content, max_chunk_size=1500)
40
- combined_response = ""
41
- for chunk in chunks:
42
- chunked_system_message = f"{system_message}\n\nFile Content Chunk:\n{chunk}"
43
- messages = [{"role": "system", "content": chunked_system_message}] + history
44
- messages.append({"role": "user", "content": message})
45
- try:
46
- completion = client.chat_completion(
47
- messages=messages,
48
- max_tokens=max_tokens,
49
- temperature=temperature,
50
- top_p=top_p,
51
- )
52
- combined_response += completion.choices[0].message["content"] + "\n"
53
- except Exception as e:
54
- combined_response += f"Error processing chunk: {e}\n"
55
- return combined_response.strip()
56
 
57
- # Function to parse the uploaded file based on its extension
58
  def parse_file(file_obj):
 
 
 
 
59
  file_extension = file_obj.name.split('.')[-1].lower()
60
  if file_extension == "pdf":
61
  try:
62
  reader = PyPDF2.PdfReader(file_obj)
63
- return "\n".join(page.extract_text() or "" for page in reader.pages)
 
 
 
64
  except Exception as e:
65
  return f"Error reading PDF: {e}"
66
  else:
@@ -69,67 +78,76 @@ def parse_file(file_obj):
69
  except Exception as e:
70
  return f"Error reading file: {e}"
71
 
72
- # Define the Gradio app interface
73
- with gr.Blocks() as demo:
74
- gr.Markdown("# **Chat with File Context (Chunking for Large Files)**")
75
- gr.Markdown("Upload large files, and chat with AI using context derived from those files.")
 
 
 
 
 
 
 
 
 
 
76
 
77
- # States to store file content and chat history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  file_content_state = gr.State("")
 
79
  chat_history_state = gr.State([])
80
 
81
- # File upload component (accepts multiple files)
 
82
  file_input = gr.File(label="Upload File(s)", file_count="multiple", type="filepath")
 
 
83
 
84
- def handle_file_upload(files):
85
- """
86
- Process uploaded files and store their content.
87
- """
88
- combined_text = ""
89
- for file in files:
90
- try:
91
- with open(file, "rb") as f:
92
- content = parse_file(f)
93
- combined_text += content + "\n"
94
- except Exception as e:
95
- combined_text += f"Error processing file {file}: {e}\n"
96
- return combined_text.strip()
97
-
98
- file_input.change(fn=handle_file_upload, inputs=file_input, outputs=file_content_state)
99
-
100
- # Chat interface components
101
- chatbot = gr.Chatbot(label="Conversation", type="messages")
102
- user_input = gr.Textbox(label="Your Message", placeholder="Ask something...", lines=2)
103
- system_prompt = gr.Textbox(label="System Prompt", value="You are a helpful AI assistant.", interactive=True)
104
- max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens")
105
- temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
106
- top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
107
-
108
- def chat_function(user_message, history, file_content, system_prompt, max_tokens, temperature, top_p):
109
- if not user_message.strip():
110
  return "", history
111
- # Get the assistant's response using the chunking function
112
- assistant_response = respond_chunked(
113
- user_message, history, system_prompt, max_tokens, temperature, top_p, file_content
114
- )
115
- # Append user and assistant messages to the conversation history
116
- history.append({"role": "user", "content": user_message})
117
- history.append({"role": "assistant", "content": assistant_response})
118
  return "", history
119
-
120
- # Button to send the user message
121
- send_button = gr.Button("Send")
122
- send_button.click(
123
- fn=chat_function,
124
- inputs=[user_input, chat_history_state, file_content_state, system_prompt, max_tokens, temperature, top_p],
125
- outputs=[user_input, chatbot]
126
- )
127
-
128
- # Enable submission via the Enter key in the textbox
129
  user_input.submit(
130
- fn=chat_function,
131
- inputs=[user_input, chat_history_state, file_content_state, system_prompt, max_tokens, temperature, top_p],
132
- outputs=[user_input, chatbot]
 
133
  )
 
 
134
 
135
- demo.launch(server_name="0.0.0.0", server_port=7860, share=True, show_error=True)
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+
4
  import nltk
5
+ import json
6
+ import io
7
+ from fpdf import FPDF
8
+ from textblob import TextBlob
9
  import PyPDF2
10
+ import tempfile
11
 
12
+ # Download NLTK punkt tokenizer if needed.
13
  nltk.download("punkt", quiet=True)
14
 
15
+ ###############################################################################
16
+ # Hugging Face Chat Code #
17
+ ###############################################################################
18
+ """
19
+ For more information on Hugging Face Inference API support, please check:
20
+ https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
21
+ """
22
 
23
+ # Initialize the Hugging Face model client (make sure you have access)
24
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
25
 
26
+ def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, file_content):
27
+ """
28
+ Calls the model (in non-streaming mode) to get a complete response.
29
+ If file_content is non-empty, it is appended to the system message (context).
30
+ """
31
+ if file_content and file_content.strip():
32
+ system_message += "\n\nFile content:\n" + file_content
 
 
 
 
 
 
33
 
34
+ # Build messages list in the expected format.
35
+ messages = [{"role": "system", "content": system_message}]
36
+ for user_msg, assistant_msg in history:
37
+ if user_msg:
38
+ messages.append({"role": "user", "content": user_msg})
39
+ if assistant_msg:
40
+ messages.append({"role": "assistant", "content": assistant_msg})
41
+ messages.append({"role": "user", "content": message})
42
 
43
+ try:
44
+ completion = client.chat_completion(
45
+ messages,
46
+ max_tokens=max_tokens,
47
+ stream=False, # Non-streaming mode for simplicity
48
+ temperature=temperature,
49
+ top_p=top_p,
50
+ )
51
+ response = completion.choices[0].message["content"]
52
+ except Exception as e:
53
+ response = f"Error during model response: {e}"
54
+ return response
55
+
56
+ ###############################################################################
57
+ # File Upload & Parsing Functions #
58
+ ###############################################################################
 
59
 
 
60
  def parse_file(file_obj):
61
+ """
62
+ Parses an uploaded file.
63
+ Supports PDF (using PyPDF2) and text files (UTF-8 decoding).
64
+ """
65
  file_extension = file_obj.name.split('.')[-1].lower()
66
  if file_extension == "pdf":
67
  try:
68
  reader = PyPDF2.PdfReader(file_obj)
69
+ text = ""
70
+ for page in reader.pages:
71
+ text += (page.extract_text() or "") + "\n"
72
+ return text
73
  except Exception as e:
74
  return f"Error reading PDF: {e}"
75
  else:
 
78
  except Exception as e:
79
  return f"Error reading file: {e}"
80
 
81
+ def load_files(files):
82
+ """
83
+ Processes a list of uploaded files (provided as file paths).
84
+ Opens each file, parses its content, and concatenates the text.
85
+ """
86
+ all_text = ""
87
+ for file_path in files:
88
+ try:
89
+ with open(file_path, "rb") as f:
90
+ content = parse_file(f)
91
+ all_text += content + "\n"
92
+ except Exception as e:
93
+ all_text += f"Error processing file {file_path}: {e}\n"
94
+ return all_text
95
 
96
+ ###############################################################################
97
+ # Gradio UI Layout #
98
+ ###############################################################################
99
+
100
+ with gr.Blocks() as demo:
101
+ gr.Markdown("# Combined Chat & File Upload App")
102
+ gr.Markdown(
103
+ """
104
+ This app allows you to upload file(s) (PDF or TXT) to provide context for the AI.
105
+ Once files are uploaded, their contents are automatically parsed and used in every conversation.
106
+ Simply upload a file and then start chatting.
107
+ """
108
+ )
109
+
110
+ # State to hold file content (the concatenated text of uploaded files)
111
  file_content_state = gr.State("")
112
+ # State to hold the conversation history (list of (user, assistant) tuples)
113
  chat_history_state = gr.State([])
114
 
115
+ # --- File Upload ---
116
+ # Using type="filepath" so that we get a file path that can be opened later.
117
  file_input = gr.File(label="Upload File(s)", file_count="multiple", type="filepath")
118
+ # Automatically process files when they are uploaded.
119
+ file_input.change(fn=load_files, inputs=file_input, outputs=file_content_state)
120
 
121
+ gr.Markdown("## Chat")
122
+ chatbot = gr.Chatbot(label="Chat History")
123
+ user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...", lines=2)
124
+
125
+ # Additional model parameters:
126
+ system_prompt = gr.Textbox(label="System Message", value="You are a helpful AI assistant.", interactive=True)
127
+ max_tokens_slider = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens")
128
+ temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
129
+ top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
130
+
131
+ def chat_fn(user_msg, history, file_content, system_msg, max_tokens, temperature, top_p):
132
+ if not user_msg.strip():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  return "", history
134
+ # Append the user's message to the conversation history.
135
+ history.append((user_msg, ""))
136
+ # Call the respond function (non-streaming) to get a complete answer.
137
+ response = respond(user_msg, history, system_msg, max_tokens, temperature, top_p, file_content)
138
+ # Update the last entry in the conversation with the response.
139
+ history[-1] = (user_msg, response)
 
140
  return "", history
141
+
142
+ # When user submits a message, call chat_fn.
 
 
 
 
 
 
 
 
143
  user_input.submit(
144
+ fn=chat_fn,
145
+ inputs=[user_input, chat_history_state, file_content_state, system_prompt, max_tokens_slider, temperature_slider, top_p_slider],
146
+ outputs=[user_input, chatbot],
147
+ queue=True
148
  )
149
+
150
+ demo.launch(server_name="0.0.0.0", server_port=7860)
151
 
152
+ if __name__ == "__main__":
153
+ demo.launch()