【c4ai-command-r-plus、c4ai-command-r-v01】最强RAG模型中文流式输出多轮对话代码案例

型号概要

C4AI Command R+ 是一个 104B 十亿参数模型的开放权重研究版本，具有高度先进的功能，其中包括检索增强生成 (RAG) 和用于自动执行复杂任务的工具。此模型生成中使用的工具支持多步骤工具使用，这允许模型在多个步骤中组合多个工具来完成困难的任务。 C4AI Command R+ 是一个多语言模型，以 10 种语言进行性能评估：英语、法语、西班牙语、意大利语、德语、巴西葡萄牙语、日语、韩语、阿拉伯语和简体中文。 Command R+ 针对各种用例进行了优化，包括推理、总结和问答。

C4AI Command R+ 是 Cohere For AI 和 Cohere 开放权重版本系列的一部分。我们较小的配套模型是C4AI Command R

非流式推理代码

from transformers import AutoTokenizer, AutoModelForCausalLMmodel_id = "CohereForAI/c4ai-command-r-plus"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)# Format message with the command-r-plus chat template
messages = [{"role": "user", "content": "Hello, how are you?"}]
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>gen_tokens = model.generate(input_ids, max_new_tokens=100, do_sample=True, temperature=0.3,)gen_text = tokenizer.decode(gen_tokens[0])
print(gen_text)

非流式8bit量化推理代码

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfigbnb_config = BitsAndBytesConfig(load_in_8bit=True)model_id = "CohereForAI/c4ai-command-r-plus"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config)# Format message with the command-r-plus chat template
messages = [{"role": "user", "content": "Hello, how are you?"}]
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>gen_tokens = model.generate(input_ids, max_new_tokens=100, do_sample=True, temperature=0.3,)gen_text = tokenizer.decode(gen_tokens[0])
print(gen_text)

流式推理代码

import os
import sys
import torch
import platform
from threading import Thread
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import TextIteratorStreamer, GenerationConfigos.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3' # 指定显卡
os_name = platform.system()
clear_command = 'cls' if os_name == 'Windows' else 'clear'welcome_prompt = "欢迎使用 c4ai-command-r-plus 模型，输入内容即可进行对话，clear 清空对话历史，exit 终止程序"model_id = "CohereForAI/c4ai-command-r-plus"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id,device_map="auto")def streamer_generate(messages,max_new_tokens=512,temperature=0.1,top_p=0.8,top_k=3,num_beams=1,**kwargs,
):input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')streamer = TextIteratorStreamer(tokenizer)generation_config = GenerationConfig(temperature=temperature,top_p=top_p,top_k=top_k,num_beams=num_beams,**kwargs,)generation_kwargs = dict(input_ids=input_ids,streamer=streamer,max_new_tokens=max_new_tokens,do_sample=True,generation_config=generation_config,return_dict_in_generate=True,**kwargs,)thread = Thread(target=model.generate, kwargs=generation_kwargs)thread.start()generated_text = ""position = 0start = Falsestart_text = "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>"+ messages[-1]['content']+ "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"for new_text in streamer:generated_text += new_textif start_text in generated_text:generated_text = generated_text.split(start_text)[-1]position = 0start = Trueif start:if "<|END_OF_TURN_TOKEN|>" in generated_text:generated_text = generated_text.rstrip("<|END_OF_TURN_TOKEN|>")breakprint(generated_text[position:], end='', flush=True)position = len(generated_text)print("")if len(messages) > 10: # 设置多轮对话轮数del messages[2:4]return generated_text
if __name__ == "__main__":messages = [{"role": "user", "content": "请使用中文进行对话"},{"role": "chatbot", "content": "好的，我可以尝试用中文与您交流。请问有什么可以帮助您吗"}]while True:print("-"*100)query = input("user:")if query == "exit":print("Task is over.")sys.exit()if query.strip() == "clear":messages = [{"role": "user", "content": "请使用中文进行对话"},{"role": "chatbot", "content": "好的，我可以尝试用中文与您交流。请问有什么可以帮助您吗"}]os.system(clear_command)print(welcome_prompt)continuemessages.append({"role": "user", "content": query})generated_text = streamer_generate(messages)messages.append([{"role": "chatbot", "content": generated_text}])