| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546 |
- import torch
- import warnings
- warnings.filterwarnings("ignore")
- from clean_llm.models.qwen2_5 import Qwen2_5
- from transformers import AutoTokenizer
- if torch.cuda.is_available():
- device = "cuda"
- elif torch.backends.mps.is_available():
- device = "mps"
- else:
- device = "cpu"
- model_path = "huggingface_models/Qwen/Qwen2.5-0.5B-Instruct"
- model = Qwen2_5.from_pretrained(model_path).to(device)
- tokenizer = AutoTokenizer.from_pretrained(model_path)
- print(f"[INFO] Load {model_path.split('/')[-1]} model on device {device}")
- prompt = "Give me a short introduction to large language model."
- messages = [
- {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
- {"role": "user", "content": prompt}
- ]
- text = tokenizer.apply_chat_template(
- messages,
- tokenize=False,
- add_generation_prompt=True
- )
- model_inputs = tokenizer([text], return_tensors="pt").to(device)
- input_ids = model_inputs["input_ids"]
- generated_idx = model.generate(
- input_ids,
- max_new_tokens=50,
- eos_token_id=tokenizer.eos_token_id
- )
- response_ids = generated_idx[0][len(input_ids[0]):]
- response = tokenizer.decode(response_ids, skip_special_tokens=True)
- print("Prompt:")
- print(prompt)
- print("Response:")
- print(response)
|