Spaces:

Muhammadidrees
/

MoizChatDoctor

Paused

App Files Files Community

Muhammadidrees commited on Oct 15, 2025

Commit

2856d25

verified ·

1 Parent(s): 52e77eb

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -97

app.py DELETED Viewed

@@ -1,97 +0,0 @@
-import os, json, itertools, bisect, gc
-from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
-import transformers
-import torch
-from accelerate import Accelerator
-import accelerate
-import time
-import gradio as gr
-model = None
-tokenizer = None
-generator = None
-def load_model(model_name = "zl111/ChatDoctor", eight_bit=0, device_map="auto"):
-    global model, tokenizer, generator
-    print("Loading "+model_name+"...")
-    if device_map == "zero":
-        device_map = "balanced_low_0"
-    # config
-    gpu_count = torch.cuda.device_count()
-    print('gpu_count', gpu_count)
-    tokenizer = transformers.LlamaTokenizer.from_pretrained(model_name)
-    model = transformers.LlamaForCausalLM.from_pretrained(
-        model_name,
-        #device_map=device_map,
-        #device_map="auto",
-        torch_dtype=torch.float16,
-        #max_memory = {0: "14GB", 1: "14GB", 2: "14GB", 3: "14GB",4: "14GB",5: "14GB",6: "14GB",7: "14GB"},
-        #load_in_8bit=eight_bit,
-        low_cpu_mem_usage=True,
-        load_in_8bit=False,
-        cache_dir="cache"
-    ).cuda()
-    generator = model.generate
-load_model()
-history = []
-def go():
-    invitation = "Assistant: "
-    human_invitation = "Human: "
-    # input
-    msg = input(human_invitation)
-    print("")
-    history.append(human_invitation + msg)
-    fulltext = "\n\n".join(history) + "\n\n" + invitation
-#    print('SENDING==========')
-#    print(fulltext)
-#    print('==========')
-    generated_text = ""
-    gen_in = tokenizer(fulltext, return_tensors="pt").input_ids.cuda()
-    in_tokens = len(gen_in)
-    with torch.no_grad():
-            generated_ids = generator(
-                gen_in,
-                max_new_tokens=200,
-                use_cache=True,
-                pad_token_id=tokenizer.eos_token_id,
-                num_return_sequences=1,
-                do_sample=True,
-                repetition_penalty=1.1, # 1.0 means 'off'. unfortunately if we penalize it it will not output Sphynx:
-                temperature=0.5, # default: 1.0
-                top_k = 50, # default: 50
-                top_p = 1.0, # default: 1.0
-                early_stopping=True,
-            )
-            generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] # for some reason, batch_decode returns an array of one element?
-            text_without_prompt = generated_text[len(fulltext):]
-    response = text_without_prompt
-    response = response.split(human_invitation)[0]
-    response.strip()
-    print(invitation + response)
-    print("")
-    history.append(invitation + response)
-while True:
-    go()