Spaces:

segestic
/

chatcpu

Sleeping

App Files Files Community

segestic commited on May 6, 2025

Commit

e95d4c9

verified ·

1 Parent(s): 2ea7645

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -48

app.py CHANGED Viewed

@@ -1,69 +1,77 @@
 import gradio as gr
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
-# Download model from Hugging Face (if not already present)
-model_file_path = hf_hub_download(
-    repo_id="TheBloke/Llama-2-7B-GGUF",
-    filename="llama-2-7b.Q4_0.gguf"
-)
-# Initialize the Llama model
-try:
-    llm_llama_cpp = Llama(
-        model_path=model_file_path,  # Path where the model is downloaded
-        verbose=False,  # Suppress llama.cpp's own informational prints
-        n_ctx=4096  # Set context window to match model's full capacity
-    )
-    # Define the function for generating text with streaming
-    def talk(prompt, history):
-        try:
-            response_stream = llm_llama_cpp.create_completion(
-                prompt,
-                max_tokens=200,  # You can adjust the max tokens as needed
-                stream=True
-            )
-            # Prepare a response variable to store the final result
-            response = ""
-            for chunk in response_stream:
-                # Extract and accumulate the text from each chunk
-                if 'choices' in chunk and len(chunk['choices']) > 0 and 'text' in chunk['choices'][0]:
-                    response += chunk['choices'][0]['text']
-                    print(f"Streaming: {chunk['choices'][0]['text']}", end="", flush=True)
-            # After the stream is complete, return the final response
-            return response
-        except Exception as e:
-            print(f"Error in generating response: {e}")
-            return f"Error with llama-cpp-python: {e}"
-except FileNotFoundError:
-    print(f"Error: Model file not found at {model_file_path}")
-except Exception as e:
-    print(f"Error with llama-cpp-python: {e}")
-# Gradio interface setup
-TITLE = "AI Copilot for Diabetes Patients"
-DESCRIPTION = "I provide answers to concerns related to Diabetes"
-# Design chatbot interface (fixed `likeable` argument, deprecated params removed)
 demo = gr.ChatInterface(
-    fn=talk,  # The function that processes user input and returns the response
     chatbot=gr.Chatbot(
         show_label=True,
         show_share_button=True,
         show_copy_button=True,
-        layout="bubble",  # Display messages in bubble format
-        type="messages",  # Use OpenAI-style message format
     ),
-    theme="Soft",  # Soft theme for the UI
-    examples=[["what is Diabetes?"]],  # Example query to get started
-    title=TITLE,  # Title of the interface
-    description=DESCRIPTION,  # Description for context
 )
-# Launch the chatbot interface
 demo.launch()

 import gradio as gr
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
+import threading
+# Title and description
+TITLE = "AI Copilot for Diabetes Patients"
+DESCRIPTION = "I provide answers to concerns related to Diabetes"
+# Globals
+llm_llama_cpp = None
+model_ready = False
+# Download and initialize model in background
+def load_model():
+    global llm_llama_cpp, model_ready
+    try:
+        print("Downloading model...")
+        model_file_path = hf_hub_download(
+            repo_id="TheBloke/Llama-2-7B-GGUF",
+            filename="llama-2-7b.Q4_0.gguf"
+        )
+        print("Initializing model...")
+        llm_llama_cpp = Llama(
+            model_path=model_file_path,
+            verbose=False,
+            n_ctx=4096
+        )
+        model_ready = True
+        print("Model is ready.")
+    except Exception as e:
+        print(f"Failed to load model: {e}")
+# Background thread for model loading
+threading.Thread(target=load_model).start()
+# Chatbot logic
+def talk(prompt, history):
+    if not model_ready:
+        return "⏳ Please wait, the model is still loading..."
+    try:
+        response = ""
+        response_stream = llm_llama_cpp.create_completion(
+            prompt=prompt,
+            max_tokens=200,
+            stream=True
+        )
+        for chunk in response_stream:
+            if 'choices' in chunk and 'text' in chunk['choices'][0]:
+                response += chunk['choices'][0]['text']
+        return response
+    except Exception as e:
+        print(f"Error in generating response: {e}")
+        return f"Error during response generation: {e}"
+# Gradio interface
 demo = gr.ChatInterface(
+    fn=talk,
     chatbot=gr.Chatbot(
         show_label=True,
         show_share_button=True,
         show_copy_button=True,
+        layout="bubble",
+        type="messages",
     ),
+    theme="Soft",
+    examples=[["what is Diabetes?"]],
+    title=TITLE,
+    description=DESCRIPTION,
 )
+# Launch the UI
 demo.launch()