admin08077 commited on
Commit
cc25a12
·
verified ·
1 Parent(s): 325e85e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -58
app.py CHANGED
@@ -3,29 +3,17 @@ from huggingface_hub import InferenceClient
3
  import nltk
4
  import PyPDF2
5
 
 
6
  nltk.download("punkt", quiet=True)
 
7
 
8
- ###############################################################################
9
- # Hugging Face Chat Code #
10
- ###############################################################################
11
-
12
- # Initialize the Hugging Face model client
13
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
14
 
15
  def respond_chunked(message, history, system_message, max_tokens, temperature, top_p, file_content):
16
- """
17
- Calls the Hugging Face model for a response with support for chunked file content.
18
- """
19
- # Split file content into manageable chunks
20
  chunks = chunk_text(file_content, max_chunk_size=1500)
21
  combined_response = ""
22
-
23
- # Process each chunk and append to the response
24
  for chunk in chunks:
25
- # Append chunk to system message for context
26
  chunked_system_message = f"{system_message}\n\nFile Content Chunk:\n{chunk}"
27
-
28
- # Prepare the message payload
29
  messages = [{"role": "system", "content": chunked_system_message}]
30
  for user, assistant in history:
31
  if user:
@@ -33,7 +21,6 @@ def respond_chunked(message, history, system_message, max_tokens, temperature, t
33
  if assistant:
34
  messages.append({"role": "assistant", "content": assistant})
35
  messages.append({"role": "user", "content": message})
36
-
37
  try:
38
  completion = client.chat_completion(
39
  messages,
@@ -44,18 +31,9 @@ def respond_chunked(message, history, system_message, max_tokens, temperature, t
44
  combined_response += completion.choices[0].message["content"] + "\n"
45
  except Exception as e:
46
  combined_response += f"Error processing chunk: {e}\n"
47
-
48
  return combined_response.strip()
49
 
50
- ###############################################################################
51
- # File Upload & Parsing Functions #
52
- ###############################################################################
53
-
54
  def parse_file(file_obj):
55
- """
56
- Parses uploaded files and extracts content.
57
- Supports PDFs and plain text.
58
- """
59
  file_extension = file_obj.name.split('.')[-1].lower()
60
  if file_extension == "pdf":
61
  try:
@@ -70,9 +48,6 @@ def parse_file(file_obj):
70
  return f"Error reading file: {e}"
71
 
72
  def load_files(files):
73
- """
74
- Loads multiple files, parses their content, and concatenates the text.
75
- """
76
  combined_text = ""
77
  for file in files:
78
  try:
@@ -83,23 +58,14 @@ def load_files(files):
83
  combined_text += f"Error processing file {file}: {e}\n"
84
  return combined_text
85
 
86
- ###############################################################################
87
- # Chunking Function #
88
- ###############################################################################
89
-
90
  def chunk_text(text, max_chunk_size=1500):
91
- """
92
- Splits text into chunks of up to `max_chunk_size` tokens (approximate).
93
- """
94
  from nltk.tokenize import sent_tokenize
95
-
96
  sentences = sent_tokenize(text)
97
  chunks = []
98
  current_chunk = ""
99
  current_tokens = 0
100
 
101
  def approximate_token_count(text):
102
- # Naive tokenization approximation
103
  return len(text.split())
104
 
105
  for sentence in sentences:
@@ -117,50 +83,32 @@ def chunk_text(text, max_chunk_size=1500):
117
 
118
  return chunks
119
 
120
- ###############################################################################
121
- # Gradio UI Layout #
122
- ###############################################################################
123
-
124
  with gr.Blocks() as demo:
125
  gr.Markdown("# **Chat with File Context (Chunking for Large Files)**")
126
- gr.Markdown(
127
- """
128
- This app lets you upload large file(s) and chat with an AI assistant.
129
- Uploaded file content will be processed in chunks to ensure smooth handling.
130
- """
131
- )
132
-
133
- # States to store file content and chat history
134
  file_content_state = gr.State("")
135
  chat_history_state = gr.State([])
136
 
137
- # File Upload Section
138
  file_input = gr.File(label="Upload File(s)", file_count="multiple", type="filepath")
139
  file_input.change(fn=load_files, inputs=file_input, outputs=file_content_state)
140
 
141
- # Chat Section
142
- gr.Markdown("## Chat")
143
- chatbot = gr.Chatbot(label="Conversation")
144
  user_input = gr.Textbox(label="Your Message", placeholder="Ask something...", lines=2)
145
 
146
- # Model Configuration Sliders
147
  system_prompt = gr.Textbox(label="System Prompt", value="You are a helpful AI assistant.", interactive=True)
148
  max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens")
149
  temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
150
  top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
151
 
152
- # Chat Function with Chunking
153
  def chat_function(user_message, history, file_content, system_prompt, max_tokens, temperature, top_p):
154
  if not user_message.strip():
155
  return "", history
156
- # Append user's message to the chat history
157
  history.append((user_message, ""))
158
- # Get response from the model with chunking
159
  assistant_response = respond_chunked(user_message, history, system_prompt, max_tokens, temperature, top_p, file_content)
160
  history[-1] = (user_message, assistant_response)
161
  return "", history
162
 
163
- # Add a Send Button for manual submission
164
  send_button = gr.Button("Send")
165
  send_button.click(
166
  fn=chat_function,
@@ -168,7 +116,6 @@ Uploaded file content will be processed in chunks to ensure smooth handling.
168
  outputs=[user_input, chatbot]
169
  )
170
 
171
- # Submit Chat Input with Enter Key as well
172
  user_input.submit(
173
  fn=chat_function,
174
  inputs=[user_input, chat_history_state, file_content_state, system_prompt, max_tokens, temperature, top_p],
 
3
  import nltk
4
  import PyPDF2
5
 
6
+ # Download required NLTK resources
7
  nltk.download("punkt", quiet=True)
8
+ nltk.download("punkt_tab", quiet=True)
9
 
 
 
 
 
 
10
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
11
 
12
  def respond_chunked(message, history, system_message, max_tokens, temperature, top_p, file_content):
 
 
 
 
13
  chunks = chunk_text(file_content, max_chunk_size=1500)
14
  combined_response = ""
 
 
15
  for chunk in chunks:
 
16
  chunked_system_message = f"{system_message}\n\nFile Content Chunk:\n{chunk}"
 
 
17
  messages = [{"role": "system", "content": chunked_system_message}]
18
  for user, assistant in history:
19
  if user:
 
21
  if assistant:
22
  messages.append({"role": "assistant", "content": assistant})
23
  messages.append({"role": "user", "content": message})
 
24
  try:
25
  completion = client.chat_completion(
26
  messages,
 
31
  combined_response += completion.choices[0].message["content"] + "\n"
32
  except Exception as e:
33
  combined_response += f"Error processing chunk: {e}\n"
 
34
  return combined_response.strip()
35
 
 
 
 
 
36
  def parse_file(file_obj):
 
 
 
 
37
  file_extension = file_obj.name.split('.')[-1].lower()
38
  if file_extension == "pdf":
39
  try:
 
48
  return f"Error reading file: {e}"
49
 
50
  def load_files(files):
 
 
 
51
  combined_text = ""
52
  for file in files:
53
  try:
 
58
  combined_text += f"Error processing file {file}: {e}\n"
59
  return combined_text
60
 
 
 
 
 
61
  def chunk_text(text, max_chunk_size=1500):
 
 
 
62
  from nltk.tokenize import sent_tokenize
 
63
  sentences = sent_tokenize(text)
64
  chunks = []
65
  current_chunk = ""
66
  current_tokens = 0
67
 
68
  def approximate_token_count(text):
 
69
  return len(text.split())
70
 
71
  for sentence in sentences:
 
83
 
84
  return chunks
85
 
 
 
 
 
86
  with gr.Blocks() as demo:
87
  gr.Markdown("# **Chat with File Context (Chunking for Large Files)**")
88
+ gr.Markdown("Upload large files, and chat with AI using context derived from those files.")
89
+
 
 
 
 
 
 
90
  file_content_state = gr.State("")
91
  chat_history_state = gr.State([])
92
 
 
93
  file_input = gr.File(label="Upload File(s)", file_count="multiple", type="filepath")
94
  file_input.change(fn=load_files, inputs=file_input, outputs=file_content_state)
95
 
96
+ chatbot = gr.Chatbot(label="Conversation", type="messages")
 
 
97
  user_input = gr.Textbox(label="Your Message", placeholder="Ask something...", lines=2)
98
 
 
99
  system_prompt = gr.Textbox(label="System Prompt", value="You are a helpful AI assistant.", interactive=True)
100
  max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens")
101
  temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
102
  top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
103
 
 
104
  def chat_function(user_message, history, file_content, system_prompt, max_tokens, temperature, top_p):
105
  if not user_message.strip():
106
  return "", history
 
107
  history.append((user_message, ""))
 
108
  assistant_response = respond_chunked(user_message, history, system_prompt, max_tokens, temperature, top_p, file_content)
109
  history[-1] = (user_message, assistant_response)
110
  return "", history
111
 
 
112
  send_button = gr.Button("Send")
113
  send_button.click(
114
  fn=chat_function,
 
116
  outputs=[user_input, chatbot]
117
  )
118
 
 
119
  user_input.submit(
120
  fn=chat_function,
121
  inputs=[user_input, chat_history_state, file_content_state, system_prompt, max_tokens, temperature, top_p],