by Spencer Schneider and Ari Wagen · Nov 5, 2025

Colors for a Large Wall, Ellsworth Kelly (1951)
A critical part of protein co-folding is handling multiple sequence alignments (MSAs). It's one of the most computationally expensive steps in protein-structure prediction, and efficiently computing MSAs requires specialized hardware that's often impractical or expensive for individual researchers to maintain.
Rowan hosts an MSA server to protect the confidential protein sequence information of our users, and we are now excited to make standalone MSA generation available to our subscribers via both our web GUI and Python API. The aim of our MSA workflow is to provide high-quality MSA information for downstream use with protein folding and co-folding models like Boltz-2, Chai-1, and Boltz-1.
When you use a public MSA server, you are sending all your protein sequence information to a third-party server with no privacy guarantees or contractual obligations. For any organization working with proprietary drug targets, novel enzymes, or sensitive partner data, this constitutes an untenable security risk.
By using Rowan's MSA workflow, your proprietary sequences never leave our secure, managed environment. Your data remains private, compliant, and protected. All data processed by Rowan is governed by our Terms of Service (unless your organization signs a separate service agreement).
Every co-folding model handles custom MSA input slightly differently. We designed our MSA workflow to be a simple, drop-in solution for using MSA with state-of-the-art models.
At present, Rowan's MSA workflow supports these formats:
If you use a model that ingests MSA information differently, please let us know! We want to make sure our MSA workflow integrates seamlessly with your existing co-folding scripts.
Integrating Rowan's MSA workflow into your existing protein-structure-prediction pipeline should be easy. Here's a few example scripts illustrating how to compute various MSA formats through Rowan's API.
import tarfile
from pathlib import Path
from stjames import MSAFormat
import rowan
# rowan.api_key = ""
msa_directory = Path("msa_directory")
msa_workflow = rowan.submit_msa_workflow(
initial_protein_sequences=[
"VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR",
"VHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH",
],
output_formats=[MSAFormat.COLABFOLD],
name="Colabfold Paired MSA Example",
)
msa_workflow.wait_for_result().fetch_latest(in_place=True)
msa_workflow.download_msa_files(MSAFormat.COLABFOLD, path=msa_directory)
tar_path = next(msa_directory.glob("*.tar.gz"))
with tarfile.open(tar_path, "r") as tar_ref:
tar_ref.extractall(msa_directory)
tar_path.unlink()
# This produces two folders, one with unpaired msas called "unpaired"
# and one with paired msas called "paired"
import tarfile
from pathlib import Path
from chai_lab.chai1 import run_inference
from stjames import MSAFormat
import rowan
# rowan.api_key = "rowan-sk..."
example_fasta = (
">protein|name=example-protein\n"
"HPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRIDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPVAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n"
)
fasta_path = Path("/tmp/input.fasta")
fasta_path.write_text(example_fasta)
output_dir = Path("/tmp/outputs")
msa_directory = Path("msa_directory")
msa_workflow = rowan.submit_msa_workflow(
initial_protein_sequences=[
"HPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRIDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPVAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW"
],
output_formats=[MSAFormat.CHAI],
name="CHAI MSA Example",
)
msa_workflow.wait_for_result().fetch_latest(in_place=True)
msa_workflow.download_msa_files(MSAFormat.CHAI, path=msa_directory)
tar_path = next(msa_directory.glob("*.tar.gz"))
with tarfile.open(tar_path, "r") as tar_ref:
tar_ref.extractall(msa_directory)
tar_path.unlink()
run_inference(fasta_file=fasta_path,
output_dir=output_dir,
num_trunk_recycles=3,
num_diffn_timesteps=200,
seed=42,
device="cpu", # or "cuda:0"
use_esm_embeddings=True,
use_msa_server=False,
use_templates_server=False,
msa_directory=msa_directory)
This example show how Rowan-generated MSAs can be used with Boltz, going all the way from inputs to predicted .cif.
This script was run in a uv environment with the following pyproject.toml:
[project]
name = "rowan-msa-boltz"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"boltz>=2.2.1",
"rowan-python>=2.1.8",
]
Here's our script for using Rowan MSAs with Boltz-2:
import subprocess
import tarfile
import yaml
from pathlib import Path
import rowan
from stjames import MSAFormat
rowan.api_key = "rowan-sk..." # your API key here
def run_boltz(name: str, protein_sequences: list[str]):
input_yaml = Path(f"{name}.yaml")
output_dir = Path("out")
msa_dir = Path(f"msa/{name}")
# generate MSAs
print("Submitting MSA workflow...")
msa_workflow = rowan.submit_msa_workflow(
initial_protein_sequences=protein_sequences,
output_formats=[MSAFormat.BOLTZ],
name=name,
)
print("Waiting for MSA results...")
msa_workflow.wait_for_result().fetch_latest(in_place=True)
print("Downloading MSA files...")
msa_workflow.download_msa_files(MSAFormat.BOLTZ, path=msa_dir)
# extract .tar.gz
tar_path = next(msa_dir.glob("*.tar.gz"))
print(f"Extracting {tar_path}...")
with tarfile.open(tar_path, "r:gz") as tar_ref:
tar_ref.extractall(msa_dir, filter="data")
tar_path.unlink()
csvs = sorted(msa_dir.glob("*.csv"))
data = {
"sequences": [
{"protein": {"id": chr(65 + i), "sequence": s, "msa": str(f)}}
for i, (s, f) in enumerate(zip(protein_sequences, csvs))
]
}
yaml.safe_dump(data, open(input_yaml, "w"), sort_keys=False)
print(f"Wrote YAML to {input_yaml.resolve()}")
# run boltz
print("Running Boltz prediction...")
cmd = ["boltz", "predict", str(input_yaml), "--out_dir", str(output_dir)]
subprocess.run(cmd, check=True)
print("Done!")
if __name__ == "__main__":
name = "barnase–barstar complex"
protein_sequences = [
"AQVINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYATTDHYQTFTKIR",
"MKKAVINGEQIRSISDLHQTLKKELALPEYYGENLDALWAALTGWVEYPLVLEWRQFEQSKQLTENGAESVLQVFREAKAEGADITIILS",
]
run_boltz(name, protein_sequences)
Boltz handles MSAs differently than Chai does. The files returned for use with Boltz will be named seq_{index}.csv where the sequences are zero-indexed (e.g. seq_0.csv). These output indices will match the order of the inputs supplied to Rowan's MSA workflow. To input these MSAs, the file names need to be matched up to the sequences in the Boltz input YAML. For example, a YAML for Boltz should look like (the above script handles this):
sequences:
- protein:
id: A
sequence: VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR
msa: /path/to/msa_directory/seq_0.csv
- protein:
id: B
sequence: VHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH
msa: /path/to/msa_directory/seq_1.csv
