A Coding Implementation for Building and Analyzing Crystal Structures Using Pymatgen for Symmetry Analysis, Phase Diagrams, Surface Generation, and Materials Project Integration
disordered = Structure(
Lattice.cubic(3.6),
[{“Cu”: 0.5, “Au”: 0.5}],
[[0, 0, 0]],
)
disordered.make_supercell([2, 2, 2])
print(“Disordered composition:”, disordered.composition)
try:
disordered_oxi = disordered.copy()
disordered_oxi.add_oxidation_state_by_element({“Cu”: 1, “Au”: 1})
ordered_transform = OrderDisorderedStructureTransformation()
ordered_candidates = ordered_transform.apply_transformation(
disordered_oxi,
return_ranked_list=3,
)
for idx, cand in enumerate(ordered_candidates):
s = cand[“structure”].copy()
s.remove_oxidation_states()
print(f”Ordered candidate {idx+1}: formula={s.composition.formula}, sites={len(s)}”)
except Exception as e:
print(“Ordering step skipped due to transformation issue:”, e)
header(“12. MOLECULE SUPPORT”)
water = Molecule(
[“O”, “H”, “H”],
[
[0.0, 0.0, 0.0],
[0.7586, 0.0, 0.5043],
[-0.7586, 0.0, 0.5043],
],
)
print(“Water formula:”, water.composition.formula)
print(“Water center of mass:”, np.round(water.center_of_mass, 4))
print(
“O-H bond lengths:”,
round(water.get_distance(0, 1), 4),
round(water.get_distance(0, 2), 4),
)
header(“13. CIF EXPORT”)
output_dir = “/content/pymatgen_tutorial_outputs”
os.makedirs(output_dir, exist_ok=True)
si_cif = os.path.join(output_dir, “si.cif”)
nacl_cif = os.path.join(output_dir, “nacl.cif”)
slab_cif = os.path.join(output_dir, “si_111_slab.cif”)
CifWriter(si).write_file(si_cif)
CifWriter(nacl).write_file(nacl_cif)
CifWriter(slab).write_file(slab_cif)
print(“Saved:”, si_cif)
print(“Saved:”, nacl_cif)
print(“Saved:”, slab_cif)
header(“14. DATAFRAME SUMMARY”)
rows = []
for name, s in [
(“Si”, si),
(“NaCl”, nacl),
(“LiFePO4-like”, li_fe_po4),
(“Si slab”, slab),
]:
sga = SpacegroupAnalyzer(s, symprec=0.1)
rows.append(
{
“name”: name,
“formula”: s.composition.reduced_formula,
“sites”: len(s),
“volume_A3”: round(s.volume, 4),
“density_g_cm3”: round(float(s.density), 4),
“spacegroup”: sga.get_space_group_symbol(),
“sg_number”: sga.get_space_group_number(),
}
)
df = pd.DataFrame(rows)
print(df)
header(“15. OPTIONAL MATERIALS PROJECT API ACCESS”)
mp_api_key = None
try:
from google.colab import userdata
mp_api_key = userdata.get(“MP_API_KEY”)
except Exception:
pass
if not mp_api_key:
mp_api_key = os.environ.get(“MP_API_KEY”, None)
if mp_api_key:
try:
from pymatgen.ext.matproj import MPRester
with MPRester(mp_api_key) as mpr:
mp_struct = mpr.get_structure_by_material_id(“mp-149”)
summary_docs = mpr.summary.search(
material_ids=[“mp-149”],
fields=[
“material_id”,
“formula_pretty”,
“band_gap”,
“energy_above_hull”,
“is_stable”,
],
)
print(“Fetched mp-149 from Materials Project”)
print(“Formula:”, mp_struct.composition.reduced_formula)
print(“Sites:”, len(mp_struct))
if len(summary_docs) > 0:
doc = summary_docs[0]
print(
{
“material_id”: str(doc.material_id),
“formula_pretty”: doc.formula_pretty,
“band_gap”: doc.band_gap,
“energy_above_hull”: doc.energy_above_hull,
“is_stable”: doc.is_stable,
}
)
except Exception as e:
print(“Materials Project API section skipped due to runtime/API issue:”, e)
else:
print(“No MP_API_KEY found. Skipping live Materials Project query.”)
print(“In Colab, add a secret named MP_API_KEY or set os.environ[‘MP_API_KEY’].”)
header(“16. SAVE SUMMARY JSON”)
summary = {
“structures”: {
“Si”: {
“formula”: si.composition.reduced_formula,
“sites”: len(si),
“spacegroup”: SpacegroupAnalyzer(si, symprec=0.1).get_space_group_symbol(),
},
“NaCl”: {
“formula”: nacl.composition.reduced_formula,
“sites”: len(nacl),
“spacegroup”: SpacegroupAnalyzer(nacl, symprec=0.1).get_space_group_symbol(),
},
“LiFePO4-like”: {
“formula”: li_fe_po4.composition.reduced_formula,
“sites”: len(li_fe_po4),
“spacegroup”: SpacegroupAnalyzer(li_fe_po4, symprec=0.1).get_space_group_symbol(),
},
},
“phase_diagram”: {
“target”: target.composition.reduced_formula,
“energy_above_hull_eV_atom”: float(e_above_hull),
},
“files”: {
“si_cif”: si_cif,
“nacl_cif”: nacl_cif,
“slab_cif”: slab_cif,
},
}
json_path = os.path.join(output_dir, “summary.json”)
with open(json_path, “w”) as f:
json.dump(summary, f, indent=2)
print(“Saved:”, json_path)
header(“17. FINAL NOTES”)
print(“Tutorial completed successfully.”)
print(“Artifacts are saved in:”, output_dir)
print(“You can now extend this notebook to parse VASP outputs, query MP at scale, or build defect/workflow pipelines.”)


