搭建步骤:
- 拷贝本地模型,把下载好的Baichuan2-7B-Chat拷贝到models目录下
-  修改modules\models\base_model.py文件,class ModelType增加Baichuan Baichuan=16elif"baichuan"inmodel_name_lower:model_type=ModelType.Baichuan
-  修改modules\models\models.py文件,get_model方法增加ModelType.Baichuan elifmodel_type==ModelType.Baichuan:from.BaichuanimportBaichuan_Clientmodel=Baichuan_Client(model_name, user_name=user_name)
-  增加modules\models\Baichuan.py文件 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 frommodelscopeimportsnapshot_download, AutoModelForCausalLM, AutoTokenizer,GenerationConfigfromtransformersimportAutoModelForCausalLM, AutoTokenizerfromtransformers.generationimportGenerationConfigimportloggingimportcoloramafrom..index_funcimport*from..presetsimport*from..utilsimport*from.base_modelimportBaseLLMModelfrom..presetsimportMODEL_METADATAfromdatetimeimportdatetimeclassBaichuan_Client(BaseLLMModel):def__init__(self, model_name, user_name="")->None:super().__init__(model_name=model_name, user=user_name)importtorchfromtransformersimportAutoModel, AutoTokenizerglobalCHATGLM_TOKENIZER, CHATGLM_MODELprint("__init__ Baichuan_Client")ifCHATGLM_TOKENIZERisNoneorCHATGLM_MODELisNone:model_path=Noneifos.path.exists("models"):model_dirs=os.listdir("models")ifmodel_nameinmodel_dirs:model_path=f"models/{model_name}"ifmodel_pathisnotNone:model_source=model_pathelse:model_source=snapshot_download(f"baichuan-inc/{model_name}", revision='v1.0.4')CHATGLM_TOKENIZER=AutoTokenizer.from_pretrained(model_source, device_map="auto", trust_remote_code=True, torch_dtype=torch.float16)quantified=Falseif"int4"inmodel_name:quantified=Truemodel=AutoModelForCausalLM.from_pretrained(model_source, device_map="auto", trust_remote_code=True, torch_dtype=torch.float16)model.generation_config=GenerationConfig.from_pretrained(model_source)model=model.eval()CHATGLM_MODEL=modeldef_get_glm_style_input(self):print("_get_glm_style_input")print(f"the history is: {self.history}")history=[x["content"]forxinself.history]query=history.pop()print(f"the message is: {query}")returnhistory, querydefget_answer_at_once(self):print("get_answer_at_once")history,query=self._get_glm_style_input()messages=[]messages.append({'role':'user','content': query})now=datetime.now()print("get_answer_at_once start"+"++++++++"+now.strftime("%Y-%m-%d %H:%M:%S"))response=CHATGLM_MODEL.chat(CHATGLM_TOKENIZER, messages)now=datetime.now()print("get_answer_at_once end"+"++++++++"+now.strftime("%Y-%m-%d %H:%M:%S"))print(f"the response is: {response}")returnresponse,len(response)defget_answer_stream_iter(self):history,query=self._get_glm_style_input()messages=[]messages.append({'role':'user','content': query})result=""now=datetime.now()print("get_answer_stream_iter start"+"++++++++"+now.strftime("%Y-%m-%d %H:%M:%S"))forresponseinCHATGLM_MODEL.chat(CHATGLM_TOKENIZER,messages):print(f"the response is: {response}")result+=responseyieldresultnow=datetime.now()print("get_answer_stream_iter end"+"++++++++"+now.strftime("%Y-%m-%d %H:%M:%S"))
- 答案回调开关控制get_answer_at_once、get_answer_stream_iter方法调用选择 
- 执行效果