输入“/”快速插入内容

MiniCPM 3.0 Deployment

2024年9月26日修改
👇
For personnel: can use basic bash, python, and can use python to process data.
Machine requirements: A machine with at least 8G of memory. The remaining video memory increases with context length.
Transformers
chat fuction
代码块
from transformers import AutoTokenizer,AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("/root/ld/ld_model_pretrained/minicpm3")
model=AutoModelForCausalLM.from_pretrained("/root/ld/ld_model_pretrained/minicpm3",trust_remote_code=True).cuda()
history=[]
query=input("user:")
response,history=model.chat(tokenizer, query=query,history=history)
print("model:",response)
query=input("user:")
response,history=model.chat(tokenizer, query=query,history=history)
print("model:",response)
#history is a list,format: [{"role": "assistant", "content": answer1},{"role": "assistant", "content": response}。。。。]
Generate function
代码块
from transformers import AutoTokenizer, AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("/root/ld/ld_model_pretrained/minicpm3",trust_remote_code=True).cuda()
tokenizer = AutoTokenizer.from_pretrained("/root/ld/ld_model_pretrained/minicpm3",trust_remote_code=True)
prompt = "Hey, are you conscious? Can you tell me "
inputs = tokenizer(prompt, return_tensors="pt")
# Generate
generate_ids = model.generate(inputs.input_ids.cuda(), max_length=300,do_sample=False)
output=tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
print(output)
Function call simple implementation
1.
Each function in the tools list needs to define a function with the same name to ensure that it can be called
2.
Function declarations in tools should strictly follow the examples.
3.
For the tool implementation function, such as the get_delivery_date function in the fifth line, it must be ensured that it has the same name and parameter name as in tools, and it can run smoothly. The specific implementation is not important.The model only cares about the input and return values of this tool, and the return values should be easily understood by the larger model.
代码块
#!/usr/bin/env python
# encoding: utf-8
import re
from transformers import AutoTokenizer, AutoModelForCausalLM
def get_delivery_date(order_id=None):
# Define the implementation of each function in Tools
if order_id is None:
return "Unable to query without order number"
else:
print("get_delivery_date: This should be replaced with the actual query method, and the result should be returned using return")
return "2024-09-02"
def get_response_call(tool_call_str):
# Regular expression
pattern = r'(?<=```python\n)(.*?)(?=\n```\n)'
# Use regular expression to match
match = re.search(pattern, tool_call_str)
if match:
function_call = match.group(1)
return function_call
else:
return None
tools = [
{
"type": "function",
"function": {
"name": "get_delivery_date", # Function name, needs to define a matching Python function
"description": "Get the delivery date for a customer's order. Call this whenever you need to know the delivery date, for example when a customer asks 'Where is my package'",
"parameters": {
"type": "object",
"properties": {
"order_id": { # Parameter name
"type": "string", # Parameter type
"description": "The customer's order ID.", # Parameter description
},
},
"required": ["order_id"], # Which parameters are required
"additionalProperties": False,
},
},
}
]
messages = [
{
"role": "system",
"content": "You are a helpful customer support assistant. Use the supplied tools to assist the user.",
}
]
query = "Hi, can you tell me the delivery date for my order, my order id is 123456."
tokenizer = AutoTokenizer.from_pretrained(
"/root/ld/ld_model_pretrained/minicpm3", trust_remote_code=True
)
prompt = tokenizer.apply_chat_template(
messages, tools=tools, tokenize=False, add_generation_prompt=True
)
model = AutoModelForCausalLM.from_pretrained("/root/ld/ld_model_pretrained/minicpm3", trust_remote_code=True).cuda()
response, history = model.chat(tokenizer, query=query, history=messages, do_sample=False) # For precise function calls, set do_sample to False
call_str = get_response_call(response)
print(eval(call_str))
# Output: 2024-09-02