VGreatVig07 commited on
Commit
903d923
·
verified ·
1 Parent(s): a939ef3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -18
app.py CHANGED
@@ -19,25 +19,10 @@ import torch
19
 
20
  os.environ["STREAMLIT_WATCHER_TYPE"] = "none"
21
 
22
- class LlamaCppLLMWrapper:
23
- def __init__(self, model_path, temperature=0.2, top_p=0.95, n_ctx=2048, n_threads=8, n_gpu_layers=20):
24
- self.llm = Llama(
25
- model_path=model_path,
26
- n_ctx=n_ctx,
27
- n_threads=n_threads,
28
- n_gpu_layers=n_gpu_layers,
29
- temperature=temperature,
30
- top_p=top_p,
31
- )
32
-
33
- def invoke(self, prompt, max_tokens=300):
34
- response = self.llm(prompt, max_tokens=max_tokens)
35
- return response["choices"][0]["text"].strip()
36
-
37
- # 🧠 Cache LLaMA model
38
  @st.cache_resource
39
  def load_llm():
40
- return LlamaCppLLMWrapper(model_path="Models/phi3-finetuned.gguf")
 
41
 
42
  # 🧠 Cache embedder
43
  @st.cache_resource
@@ -105,7 +90,7 @@ Question:
105
 
106
  Answer:"""
107
 
108
- answer = llm.invoke(prompt)
109
 
110
  # Save chat history
111
  st.session_state.chat_history.append(("user", user_input))
 
19
 
20
  os.environ["STREAMLIT_WATCHER_TYPE"] = "none"
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  @st.cache_resource
23
  def load_llm():
24
+ client = InferenceClient(model="microsoft/phi-3-mini-4k-instruct")
25
+ return client
26
 
27
  # 🧠 Cache embedder
28
  @st.cache_resource
 
90
 
91
  Answer:"""
92
 
93
+ answer = llm.text_generation(prompt, max_new_tokens=200)
94
 
95
  # Save chat history
96
  st.session_state.chat_history.append(("user", user_input))