Spaces:

cody82
/

bot_innopolis

Sleeping

App Files Files Community

cody82 commited on Jul 21, 2025

Commit

9d9c29a

verified ·

1 Parent(s): c263659

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -23

app.py CHANGED Viewed

@@ -1,42 +1,43 @@
 import torch
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-model_id = "google/flan-t5-base"  # можно flan-t5-large, если хватает памяти
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model.to(device)
-context = """
-Университет Иннополис был основан в 2012 году. Это современный вуз в России,
-специализирующийся на IT и робототехнике, расположенный в городе Иннополис, Татарстан.
-"""
 def respond(message, history=None):
     if history is None:
         history = []
-    prompt = (
-        "Используя следующий контекст, ответь на вопрос четко и кратко.\n"
-        f"Контекст: {context}\n"
-        f"Вопрос: {message}\n"
-        "Ответ:"
-    )
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
-            max_new_tokens=100,
             do_sample=False,
-            eos_token_id=tokenizer.eos_token_id
         )
-    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
     history.append((message, answer))
     return history
-iface = gr.ChatInterface(fn=respond, title="Innopolis Q&A")
-iface.launch()

 import torch
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
+model_path = "cody82/unitrip"  # путь к локальной модели
+config = AutoConfig.from_pretrained(model_path, local_files_only=True)
+tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
+model = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    config=config,
+    local_files_only=True,
+    torch_dtype=torch.float32,
+    device_map="auto" if torch.cuda.is_available() else None,
+)
+system_message = "Ты — умный помощник по Университету Иннополис."
 def respond(message, history=None):
     if history is None:
         history = []
+    prompt = f"{system_message}\nUser: {message}\nAssistant:"
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
+            max_new_tokens=128,
             do_sample=False,
+            pad_token_id=tokenizer.eos_token_id,
+            eos_token_id=tokenizer.eos_token_id,
+            use_cache=True,
         )
+    generated_tokens = outputs[0][inputs["input_ids"].shape[1]:]
+    answer = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
     history.append((message, answer))
     return history
+chat = gr.ChatInterface(fn=respond, title="Innopolis Assistant")
+chat.launch()