%md ## Modify Qwen This modifies a Qwen 2.5 model so that it is consistent with the Llama architecture.
Modify Qwen
This modifies a Qwen 2.5 model so that it is consistent with the Llama architecture.
2
%pip install -U mlflow transformers==4.44.2 torch==2.3.0 accelerate==0.29.0
3
dbutils.library.restartPython()
4
import json import os import shutil from collections import OrderedDict from pathlib import Path import huggingface_hub import torch from safetensors import safe_open from safetensors.torch import save_file from transformers.modeling_utils import ( SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, shard_checkpoint, ) # The model to download from Hugging Face dbutils.widgets.text("model_name", "Qwen/Qwen2.5-Coder-7B") # Root path where models will be stored dbutils.widgets.text("dbfs_root_target", "/dbfs/models")
5
# Define a function to convert and save Qwen weights def save_weight(input_dir: str, output_dir: str, shard_size: str = "2GB") -> None: """ Copies a Qwen model in the input directory to a Llama-compatible version of it in the output directory. Injects zeroed-out bias vectors in attention layers where needed in order to make it compatible with the Llama architecture. Also updates configuration as needed. """ # Load Qwen state dict from .safetensors files qwen_state_dict = OrderedDict() for filepath in os.listdir(input_dir): if filepath.endswith(".safetensors"): full_path = os.path.join(input_dir, filepath) with safe_open(full_path, framework="pt", device="cpu") as sf: for key in sf.keys(): qwen_state_dict[key] = sf.get_tensor(key) # Copy tensors and inject bias where needed to match Llama llama_state_dict = OrderedDict() for key, value in qwen_state_dict.items(): llama_state_dict[key] = value # Qwen omits bias on attn.o_proj; Llama expects it if "attn.o_proj.weight" in key: # Each attn.o_proj.weight needs an associated bias in order to be # compatible with the Llama architecture. Since Qwen doesn't use this we # insert zeroed out vectors. bias_key = key.replace("attn.o_proj.weight", "attn.o_proj.bias") llama_state_dict[bias_key] = torch.zeros_like(value[:, 0]).squeeze() # Save weights using safetensors shards, index = shard_checkpoint(llama_state_dict, max_shard_size=shard_size, weights_name=SAFE_WEIGHTS_NAME) for shard_file, shard_data in shards.items(): save_path = os.path.join(output_dir, shard_file) save_file(shard_data, save_path, metadata={"format": "pt"}) if index is not None: with open(os.path.join(output_dir, SAFE_WEIGHTS_INDEX_NAME), "w", encoding="utf-8") as f: json.dump(index, f, indent=2, sort_keys=True)
6
# Define a function to update the Qwen config and tokenizer files def save_configs(input_dir: str, output_dir: str) -> None: """ Copies Qwen config and tokenizer files to output_dir, removing Qwen-specific fields and making them compatible with the Llama architecture. """ config_name = "config.json" # Load Qwen config.json with open(os.path.join(input_dir, config_name), encoding="utf-8") as f: qwen_config_dict = json.load(f) # Modify the Qwen config to look like a Llama model llama_config_dict = {**qwen_config_dict} llama_config_dict["architectures"] = ["LlamaForCausalLM"] # now it's Llama 8-) llama_config_dict["model_type"] = "llama" llama_config_dict["attention_bias"] = True # Llama-specific llama_config_dict["mlp_bias"] = False llama_config_dict["pretraining_tp"] = 0 # Remove Qwen-specific fields related to sliding window for del_key in ["sliding_window", "use_sliding_window", "max_window_layers"]: if del_key in llama_config_dict: del llama_config_dict[del_key] # Write updated config to the new directory with open(os.path.join(output_dir, config_name), "w", encoding="utf-8") as f: json.dump(llama_config_dict, f, indent=2) # Copy other relevant files (tokenizer, merges, vocab, and so on) additional_files = [ "generation_config.json", "merges.txt", "tokenizer.json", "tokenizer_config.json", "vocab.json", ] for fname in additional_files: src = os.path.join(input_dir, fname) dst = os.path.join(output_dir, fname) if os.path.exists(src): shutil.copyfile(src, dst)
7
# Master Function to Orchestrate the Qwen→Llama Conversion def llamafy_qwen(input_dir: str, output_dir: str) -> None: """ Converts Qwen2.5 into a Llama-like architecture by rewriting weights and configs. After this step, the resulting folder can be treated as if it's a Llama model. """ os.makedirs(output_dir, exist_ok=False) # Rewrite Qwen weights to add missing biases save_weight(input_dir, output_dir) # Update config to make it a Llama model and copy other files save_configs(input_dir, output_dir) print(f"Successfully converted Qwen from '{input_dir}' to Llama format at '{output_dir}'.")
8
model_name = dbutils.widgets.get("model_name") dbfs_root_target = dbutils.widgets.get("dbfs_root_target") assert model_name assert dbfs_root_target target_dbfs_path = os.path.join(dbfs_root_target, model_name) target_dbfs_modified_path = f"{target_dbfs_path}-Llama" if not os.path.exists(Path(target_dbfs_path).parent): os.makedirs(Path(target_dbfs_path).parent) if not os.path.exists(target_dbfs_path): print(f"Downloading to {target_dbfs_path}") huggingface_hub.snapshot_download(model_name, local_dir=target_dbfs_path) else: print(f"Already exists: {target_dbfs_path}")
9
if not os.path.exists(target_dbfs_modified_path): print(f"Modifying Qwen model and writing to {target_dbfs_modified_path}") llamafy_qwen(target_dbfs_path, target_dbfs_modified_path) else: print(f"Already exists: {target_dbfs_modified_path}")