Skip to main content

dicom_viewer/dcm/
loader.rs

1//! Folder scanning and DICOM metadata extraction.
2//!
3//! Reads each file with `OpenFileOptions::read_until(PIXEL_DATA)` so we
4//! never pay the cost of decoding pixels just to populate the study tree.
5
6use anyhow::{Context, Result};
7use dicom::object::{OpenFileOptions, ReadError};
8use dicom_dictionary_std::tags;
9use rayon::prelude::*;
10use std::collections::BTreeMap;
11use std::path::{Path, PathBuf};
12use tracing::{debug, info, warn};
13
14use super::study::{Instance, Series, Study};
15
16/// Scan a folder recursively for DICOM files and group them by study/series.
17pub fn load_folder(root: &Path) -> Result<Vec<Study>> {
18    let started = std::time::Instant::now();
19    let paths = collect_candidate_files(root)?;
20    info!(count = paths.len(), root = %root.display(), "scanning folder");
21
22    let parsed: Vec<Instance> = paths
23        .par_iter()
24        .filter_map(|p| match parse_instance(p) {
25            Ok(inst) => Some(inst),
26            Err(e) => {
27                debug!(path = %p.display(), error = %e, "skip file");
28                None
29            }
30        })
31        .collect();
32
33    let studies = group_into_studies(parsed);
34    info!(
35        studies = studies.len(),
36        elapsed_ms = started.elapsed().as_millis() as u64,
37        "folder scan complete"
38    );
39    Ok(studies)
40}
41
42fn collect_candidate_files(root: &Path) -> Result<Vec<PathBuf>> {
43    let mut out = Vec::new();
44    walk(root, &mut out)?;
45    Ok(out)
46}
47
48fn walk(dir: &Path, out: &mut Vec<PathBuf>) -> Result<()> {
49    let md = std::fs::metadata(dir).with_context(|| format!("stat {}", dir.display()))?;
50    if md.is_file() {
51        if looks_like_dicom(dir) {
52            out.push(dir.to_path_buf());
53        }
54        return Ok(());
55    }
56    // nosemgrep: path-traversal — `dir` is a folder the user explicitly
57    // selected via File ▸ Open Folder or supplied on the CLI. Symlinks can't
58    // be used to escape the tree because `DirEntry::file_type()` reports
59    // symlinks as neither `is_file` nor `is_dir`, so the branches below
60    // skip them before any file is opened.
61    for entry in std::fs::read_dir(dir).with_context(|| format!("read_dir {}", dir.display()))? {
62        let entry = entry?;
63        let path = entry.path();
64        let ft = entry.file_type()?;
65        if ft.is_dir() {
66            walk(&path, out)?;
67        } else if ft.is_file() && looks_like_dicom(&path) {
68            out.push(path);
69        }
70    }
71    Ok(())
72}
73
74fn looks_like_dicom(p: &Path) -> bool {
75    // Accept .dcm explicitly; otherwise accept files with no extension
76    // (common on CDs) and let the parser reject non-DICOM content.
77    match p.extension().and_then(|e| e.to_str()) {
78        Some(ext) => ext.eq_ignore_ascii_case("dcm") || ext.eq_ignore_ascii_case("dicom"),
79        None => {
80            let name = p.file_name().and_then(|n| n.to_str()).unwrap_or("");
81            !name.starts_with('.') && name != "DICOMDIR" && {
82                // Heuristic: probe the DICOM "DICM" magic at byte 128.
83                file_has_dicm_magic(p).unwrap_or(false)
84            }
85        }
86    }
87}
88
89fn file_has_dicm_magic(p: &Path) -> std::io::Result<bool> {
90    use std::io::{Read, Seek, SeekFrom};
91    // nosemgrep: path-traversal — `p` is a regular file already vetted by
92    // `walk` (symlinks pre-filtered, see comment there). The probe reads
93    // exactly 4 bytes at offset 128 and returns a bool; no path-controlled
94    // content is reflected back or written anywhere.
95    let mut f = std::fs::File::open(p)?;
96    if f.metadata()?.len() < 132 {
97        return Ok(false);
98    }
99    f.seek(SeekFrom::Start(128))?;
100    let mut buf = [0u8; 4];
101    f.read_exact(&mut buf)?;
102    Ok(&buf == b"DICM")
103}
104
105fn parse_instance(path: &Path) -> Result<Instance, ReadError> {
106    let obj = OpenFileOptions::new()
107        .read_until(tags::PIXEL_DATA)
108        .open_file(path)?;
109
110    let take_str = |tag| {
111        obj.element(tag)
112            .ok()
113            .and_then(|e| e.to_str().ok())
114            .map(|s| s.trim_end_matches('\0').trim().to_string())
115            .unwrap_or_default()
116    };
117    let take_opt_str = |tag| {
118        obj.element(tag)
119            .ok()
120            .and_then(|e| e.to_str().ok())
121            .map(|s| s.trim_end_matches('\0').trim().to_string())
122    };
123    let take_f64 = |tag| obj.element(tag).ok().and_then(|e| e.to_float64().ok());
124    let take_int = |tag, default: i32| {
125        obj.element(tag)
126            .ok()
127            .and_then(|e| e.to_int::<i32>().ok())
128            .unwrap_or(default)
129    };
130    let take_u16 = |tag, default: u16| {
131        obj.element(tag)
132            .ok()
133            .and_then(|e| e.to_int::<u16>().ok())
134            .unwrap_or(default)
135    };
136
137    let pixel_spacing = obj
138        .element(tags::PIXEL_SPACING)
139        .ok()
140        .and_then(|e| e.to_multi_float64().ok())
141        .and_then(|v| {
142            if v.len() >= 2 {
143                Some((v[0], v[1]))
144            } else {
145                None
146            }
147        });
148
149    let image_position_z = obj
150        .element(tags::IMAGE_POSITION_PATIENT)
151        .ok()
152        .and_then(|e| e.to_multi_float64().ok())
153        .and_then(|v| v.get(2).copied());
154
155    Ok(Instance {
156        path: path.to_path_buf(),
157        sop_instance_uid: take_str(tags::SOP_INSTANCE_UID),
158        instance_number: take_int(tags::INSTANCE_NUMBER, 0),
159        rows: take_u16(tags::ROWS, 0),
160        cols: take_u16(tags::COLUMNS, 0),
161        modality: take_str(tags::MODALITY),
162        photometric: take_str(tags::PHOTOMETRIC_INTERPRETATION),
163        window_center: take_f64(tags::WINDOW_CENTER),
164        window_width: take_f64(tags::WINDOW_WIDTH),
165        rescale_slope: take_f64(tags::RESCALE_SLOPE).unwrap_or(1.0),
166        rescale_intercept: take_f64(tags::RESCALE_INTERCEPT).unwrap_or(0.0),
167        pixel_spacing,
168        view_position: take_opt_str(tags::VIEW_POSITION),
169        image_laterality: take_opt_str(tags::IMAGE_LATERALITY),
170        image_position_z,
171    })
172}
173
174fn group_into_studies(instances: Vec<Instance>) -> Vec<Study> {
175    // study UID -> series UID -> Vec<Instance>
176    let mut by_study: BTreeMap<String, BTreeMap<String, Vec<Instance>>> = BTreeMap::new();
177    let mut study_meta: BTreeMap<String, StudyMeta> = BTreeMap::new();
178    let mut series_meta: BTreeMap<(String, String), SeriesMeta> = BTreeMap::new();
179
180    for inst in instances {
181        let study_uid = read_meta_tag(&inst.path, tags::STUDY_INSTANCE_UID)
182            .unwrap_or_else(|| "(unknown study)".into());
183        let series_uid = read_meta_tag(&inst.path, tags::SERIES_INSTANCE_UID)
184            .unwrap_or_else(|| "(unknown series)".into());
185
186        study_meta
187            .entry(study_uid.clone())
188            .or_insert_with(|| StudyMeta::read(&inst.path));
189        series_meta
190            .entry((study_uid.clone(), series_uid.clone()))
191            .or_insert_with(|| SeriesMeta::read(&inst.path));
192
193        by_study
194            .entry(study_uid)
195            .or_default()
196            .entry(series_uid)
197            .or_default()
198            .push(inst);
199    }
200
201    let mut studies: Vec<Study> = Vec::with_capacity(by_study.len());
202    for (study_uid, series_map) in by_study {
203        let meta = study_meta.remove(&study_uid).unwrap_or_default();
204        let mut series_vec: Vec<Series> = Vec::with_capacity(series_map.len());
205        let mut modalities = Vec::<String>::new();
206        for (series_uid, mut insts) in series_map {
207            sort_slices(&mut insts);
208            let smeta = series_meta
209                .remove(&(study_uid.clone(), series_uid.clone()))
210                .unwrap_or_default();
211            if !smeta.modality.is_empty() && !modalities.contains(&smeta.modality) {
212                modalities.push(smeta.modality.clone());
213            }
214            series_vec.push(Series {
215                series_instance_uid: series_uid,
216                series_number: smeta.series_number,
217                modality: smeta.modality,
218                description: smeta.description,
219                instances: insts,
220            });
221        }
222        series_vec.sort_by_key(|s| s.series_number);
223
224        studies.push(Study {
225            study_instance_uid: study_uid,
226            patient_name: meta.patient_name,
227            patient_id: meta.patient_id,
228            study_date: meta.study_date,
229            study_description: meta.study_description,
230            modalities,
231            series: series_vec,
232        });
233    }
234
235    // Newest studies first — radiologists scan top-down.
236    studies.sort_by(|a, b| b.study_date.cmp(&a.study_date));
237
238    if studies.is_empty() {
239        warn!("no DICOM files parsed");
240    }
241    studies
242}
243
244/// Order slices the way a radiologist expects to scroll them. When every
245/// instance carries ImagePositionPatient, sort by z — anatomically correct.
246/// Otherwise fall back to InstanceNumber. Stable ties are broken by
247/// InstanceNumber so duplicate-z slices stay deterministic.
248fn sort_slices(insts: &mut [Instance]) {
249    let all_have_z = !insts.is_empty() && insts.iter().all(|i| i.image_position_z.is_some());
250    if all_have_z {
251        insts.sort_by(|a, b| {
252            let az = a.image_position_z.unwrap_or(0.0);
253            let bz = b.image_position_z.unwrap_or(0.0);
254            az.partial_cmp(&bz)
255                .unwrap_or(std::cmp::Ordering::Equal)
256                .then_with(|| a.instance_number.cmp(&b.instance_number))
257        });
258    } else {
259        insts.sort_by_key(|i| i.instance_number);
260    }
261}
262
263#[derive(Default)]
264struct StudyMeta {
265    patient_name: String,
266    patient_id: String,
267    study_date: String,
268    study_description: String,
269}
270
271impl StudyMeta {
272    fn read(path: &Path) -> Self {
273        let obj = match OpenFileOptions::new()
274            .read_until(tags::PIXEL_DATA)
275            .open_file(path)
276        {
277            Ok(o) => o,
278            Err(_) => return Self::default(),
279        };
280        let s = |tag| {
281            obj.element(tag)
282                .ok()
283                .and_then(|e| e.to_str().ok())
284                .map(|s| s.trim_end_matches('\0').trim().to_string())
285                .unwrap_or_default()
286        };
287        Self {
288            patient_name: s(tags::PATIENT_NAME),
289            patient_id: s(tags::PATIENT_ID),
290            study_date: s(tags::STUDY_DATE),
291            study_description: s(tags::STUDY_DESCRIPTION),
292        }
293    }
294}
295
296#[derive(Default)]
297struct SeriesMeta {
298    modality: String,
299    description: String,
300    series_number: i32,
301}
302
303impl SeriesMeta {
304    fn read(path: &Path) -> Self {
305        let obj = match OpenFileOptions::new()
306            .read_until(tags::PIXEL_DATA)
307            .open_file(path)
308        {
309            Ok(o) => o,
310            Err(_) => return Self::default(),
311        };
312        Self {
313            modality: obj
314                .element(tags::MODALITY)
315                .ok()
316                .and_then(|e| e.to_str().ok())
317                .map(|s| s.trim_end_matches('\0').trim().to_string())
318                .unwrap_or_default(),
319            description: obj
320                .element(tags::SERIES_DESCRIPTION)
321                .ok()
322                .and_then(|e| e.to_str().ok())
323                .map(|s| s.trim_end_matches('\0').trim().to_string())
324                .unwrap_or_default(),
325            series_number: obj
326                .element(tags::SERIES_NUMBER)
327                .ok()
328                .and_then(|e| e.to_int::<i32>().ok())
329                .unwrap_or(0),
330        }
331    }
332}
333
334fn read_meta_tag(path: &Path, tag: dicom::core::Tag) -> Option<String> {
335    let obj = OpenFileOptions::new()
336        .read_until(tags::PIXEL_DATA)
337        .open_file(path)
338        .ok()?;
339    obj.element(tag)
340        .ok()
341        .and_then(|e| e.to_str().ok())
342        .map(|s| s.trim_end_matches('\0').trim().to_string())
343}