Generate Text Phixtral inference

Phixtral inference

Code to run mlabonne/phixtral-2x2_8

https://huggingface.co/mlabonne/phixtral-2x2_8 is recommended and mlabonne/phixtral-4x2_8 https://huggingface.co/mlabonne/phixtral-4x2_8 in 4-bit precision.

pip install -qqq --upgrade transformers einops accelerate bitsandbytes --progress-bar off

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "phixtral-2x2_8" # @param ["phixtral-2x2_8", "phixtral-4x2_8"]
instruction = "Write an epic poem about Ancient Rome." # @param {type:"string"}

prompt = f'''
<|im_start|>system
You are Phixtral, a helpful AI assistant.<|im_end|>
<|im_start|>user
{instruction}<|im_end|>
<|im_start|>assistant
'''

torch.set_default_device("cuda")

Load the model and tokenizer

model = AutoModelForCausalLM.from_pretrained(
f"mlabonne/{model_name}",
torch_dtype="auto",
load_in_4bit=True,
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(
f"mlabonne/{model_name}",
trust_remote_code=True
)

Tokenize the input string

inputs = tokenizer(
prompt,
return_tensors="pt",
return_attention_mask=False
)

Generate text using the model

outputs = model.generate(**inputs, max_length=200)

Decode and print the output

text = tokenizer.batch_decode(outputs)[0]
print(text[len(prompt):])