1use anyhow::{Context, Result};
7use dicom::object::{OpenFileOptions, ReadError};
8use dicom_dictionary_std::tags;
9use rayon::prelude::*;
10use std::collections::BTreeMap;
11use std::path::{Path, PathBuf};
12use tracing::{debug, info, warn};
13
14use super::study::{Instance, Series, Study};
15
16pub fn load_folder(root: &Path) -> Result<Vec<Study>> {
18 let started = std::time::Instant::now();
19 let paths = collect_candidate_files(root)?;
20 info!(count = paths.len(), root = %root.display(), "scanning folder");
21
22 let parsed: Vec<Instance> = paths
23 .par_iter()
24 .filter_map(|p| match parse_instance(p) {
25 Ok(inst) => Some(inst),
26 Err(e) => {
27 debug!(path = %p.display(), error = %e, "skip file");
28 None
29 }
30 })
31 .collect();
32
33 let studies = group_into_studies(parsed);
34 info!(
35 studies = studies.len(),
36 elapsed_ms = started.elapsed().as_millis() as u64,
37 "folder scan complete"
38 );
39 Ok(studies)
40}
41
42fn collect_candidate_files(root: &Path) -> Result<Vec<PathBuf>> {
43 let mut out = Vec::new();
44 walk(root, &mut out)?;
45 Ok(out)
46}
47
48fn walk(dir: &Path, out: &mut Vec<PathBuf>) -> Result<()> {
49 let md = std::fs::metadata(dir).with_context(|| format!("stat {}", dir.display()))?;
50 if md.is_file() {
51 if looks_like_dicom(dir) {
52 out.push(dir.to_path_buf());
53 }
54 return Ok(());
55 }
56 for entry in std::fs::read_dir(dir).with_context(|| format!("read_dir {}", dir.display()))? {
62 let entry = entry?;
63 let path = entry.path();
64 let ft = entry.file_type()?;
65 if ft.is_dir() {
66 walk(&path, out)?;
67 } else if ft.is_file() && looks_like_dicom(&path) {
68 out.push(path);
69 }
70 }
71 Ok(())
72}
73
74fn looks_like_dicom(p: &Path) -> bool {
75 match p.extension().and_then(|e| e.to_str()) {
78 Some(ext) => ext.eq_ignore_ascii_case("dcm") || ext.eq_ignore_ascii_case("dicom"),
79 None => {
80 let name = p.file_name().and_then(|n| n.to_str()).unwrap_or("");
81 !name.starts_with('.') && name != "DICOMDIR" && {
82 file_has_dicm_magic(p).unwrap_or(false)
84 }
85 }
86 }
87}
88
89fn file_has_dicm_magic(p: &Path) -> std::io::Result<bool> {
90 use std::io::{Read, Seek, SeekFrom};
91 let mut f = std::fs::File::open(p)?;
96 if f.metadata()?.len() < 132 {
97 return Ok(false);
98 }
99 f.seek(SeekFrom::Start(128))?;
100 let mut buf = [0u8; 4];
101 f.read_exact(&mut buf)?;
102 Ok(&buf == b"DICM")
103}
104
105fn parse_instance(path: &Path) -> Result<Instance, ReadError> {
106 let obj = OpenFileOptions::new()
107 .read_until(tags::PIXEL_DATA)
108 .open_file(path)?;
109
110 let take_str = |tag| {
111 obj.element(tag)
112 .ok()
113 .and_then(|e| e.to_str().ok())
114 .map(|s| s.trim_end_matches('\0').trim().to_string())
115 .unwrap_or_default()
116 };
117 let take_opt_str = |tag| {
118 obj.element(tag)
119 .ok()
120 .and_then(|e| e.to_str().ok())
121 .map(|s| s.trim_end_matches('\0').trim().to_string())
122 };
123 let take_f64 = |tag| obj.element(tag).ok().and_then(|e| e.to_float64().ok());
124 let take_int = |tag, default: i32| {
125 obj.element(tag)
126 .ok()
127 .and_then(|e| e.to_int::<i32>().ok())
128 .unwrap_or(default)
129 };
130 let take_u16 = |tag, default: u16| {
131 obj.element(tag)
132 .ok()
133 .and_then(|e| e.to_int::<u16>().ok())
134 .unwrap_or(default)
135 };
136
137 let pixel_spacing = obj
138 .element(tags::PIXEL_SPACING)
139 .ok()
140 .and_then(|e| e.to_multi_float64().ok())
141 .and_then(|v| {
142 if v.len() >= 2 {
143 Some((v[0], v[1]))
144 } else {
145 None
146 }
147 });
148
149 let image_position_z = obj
150 .element(tags::IMAGE_POSITION_PATIENT)
151 .ok()
152 .and_then(|e| e.to_multi_float64().ok())
153 .and_then(|v| v.get(2).copied());
154
155 Ok(Instance {
156 path: path.to_path_buf(),
157 sop_instance_uid: take_str(tags::SOP_INSTANCE_UID),
158 instance_number: take_int(tags::INSTANCE_NUMBER, 0),
159 rows: take_u16(tags::ROWS, 0),
160 cols: take_u16(tags::COLUMNS, 0),
161 modality: take_str(tags::MODALITY),
162 photometric: take_str(tags::PHOTOMETRIC_INTERPRETATION),
163 window_center: take_f64(tags::WINDOW_CENTER),
164 window_width: take_f64(tags::WINDOW_WIDTH),
165 rescale_slope: take_f64(tags::RESCALE_SLOPE).unwrap_or(1.0),
166 rescale_intercept: take_f64(tags::RESCALE_INTERCEPT).unwrap_or(0.0),
167 pixel_spacing,
168 view_position: take_opt_str(tags::VIEW_POSITION),
169 image_laterality: take_opt_str(tags::IMAGE_LATERALITY),
170 image_position_z,
171 })
172}
173
174fn group_into_studies(instances: Vec<Instance>) -> Vec<Study> {
175 let mut by_study: BTreeMap<String, BTreeMap<String, Vec<Instance>>> = BTreeMap::new();
177 let mut study_meta: BTreeMap<String, StudyMeta> = BTreeMap::new();
178 let mut series_meta: BTreeMap<(String, String), SeriesMeta> = BTreeMap::new();
179
180 for inst in instances {
181 let study_uid = read_meta_tag(&inst.path, tags::STUDY_INSTANCE_UID)
182 .unwrap_or_else(|| "(unknown study)".into());
183 let series_uid = read_meta_tag(&inst.path, tags::SERIES_INSTANCE_UID)
184 .unwrap_or_else(|| "(unknown series)".into());
185
186 study_meta
187 .entry(study_uid.clone())
188 .or_insert_with(|| StudyMeta::read(&inst.path));
189 series_meta
190 .entry((study_uid.clone(), series_uid.clone()))
191 .or_insert_with(|| SeriesMeta::read(&inst.path));
192
193 by_study
194 .entry(study_uid)
195 .or_default()
196 .entry(series_uid)
197 .or_default()
198 .push(inst);
199 }
200
201 let mut studies: Vec<Study> = Vec::with_capacity(by_study.len());
202 for (study_uid, series_map) in by_study {
203 let meta = study_meta.remove(&study_uid).unwrap_or_default();
204 let mut series_vec: Vec<Series> = Vec::with_capacity(series_map.len());
205 let mut modalities = Vec::<String>::new();
206 for (series_uid, mut insts) in series_map {
207 sort_slices(&mut insts);
208 let smeta = series_meta
209 .remove(&(study_uid.clone(), series_uid.clone()))
210 .unwrap_or_default();
211 if !smeta.modality.is_empty() && !modalities.contains(&smeta.modality) {
212 modalities.push(smeta.modality.clone());
213 }
214 series_vec.push(Series {
215 series_instance_uid: series_uid,
216 series_number: smeta.series_number,
217 modality: smeta.modality,
218 description: smeta.description,
219 instances: insts,
220 });
221 }
222 series_vec.sort_by_key(|s| s.series_number);
223
224 studies.push(Study {
225 study_instance_uid: study_uid,
226 patient_name: meta.patient_name,
227 patient_id: meta.patient_id,
228 study_date: meta.study_date,
229 study_description: meta.study_description,
230 modalities,
231 series: series_vec,
232 });
233 }
234
235 studies.sort_by(|a, b| b.study_date.cmp(&a.study_date));
237
238 if studies.is_empty() {
239 warn!("no DICOM files parsed");
240 }
241 studies
242}
243
244fn sort_slices(insts: &mut [Instance]) {
249 let all_have_z = !insts.is_empty() && insts.iter().all(|i| i.image_position_z.is_some());
250 if all_have_z {
251 insts.sort_by(|a, b| {
252 let az = a.image_position_z.unwrap_or(0.0);
253 let bz = b.image_position_z.unwrap_or(0.0);
254 az.partial_cmp(&bz)
255 .unwrap_or(std::cmp::Ordering::Equal)
256 .then_with(|| a.instance_number.cmp(&b.instance_number))
257 });
258 } else {
259 insts.sort_by_key(|i| i.instance_number);
260 }
261}
262
263#[derive(Default)]
264struct StudyMeta {
265 patient_name: String,
266 patient_id: String,
267 study_date: String,
268 study_description: String,
269}
270
271impl StudyMeta {
272 fn read(path: &Path) -> Self {
273 let obj = match OpenFileOptions::new()
274 .read_until(tags::PIXEL_DATA)
275 .open_file(path)
276 {
277 Ok(o) => o,
278 Err(_) => return Self::default(),
279 };
280 let s = |tag| {
281 obj.element(tag)
282 .ok()
283 .and_then(|e| e.to_str().ok())
284 .map(|s| s.trim_end_matches('\0').trim().to_string())
285 .unwrap_or_default()
286 };
287 Self {
288 patient_name: s(tags::PATIENT_NAME),
289 patient_id: s(tags::PATIENT_ID),
290 study_date: s(tags::STUDY_DATE),
291 study_description: s(tags::STUDY_DESCRIPTION),
292 }
293 }
294}
295
296#[derive(Default)]
297struct SeriesMeta {
298 modality: String,
299 description: String,
300 series_number: i32,
301}
302
303impl SeriesMeta {
304 fn read(path: &Path) -> Self {
305 let obj = match OpenFileOptions::new()
306 .read_until(tags::PIXEL_DATA)
307 .open_file(path)
308 {
309 Ok(o) => o,
310 Err(_) => return Self::default(),
311 };
312 Self {
313 modality: obj
314 .element(tags::MODALITY)
315 .ok()
316 .and_then(|e| e.to_str().ok())
317 .map(|s| s.trim_end_matches('\0').trim().to_string())
318 .unwrap_or_default(),
319 description: obj
320 .element(tags::SERIES_DESCRIPTION)
321 .ok()
322 .and_then(|e| e.to_str().ok())
323 .map(|s| s.trim_end_matches('\0').trim().to_string())
324 .unwrap_or_default(),
325 series_number: obj
326 .element(tags::SERIES_NUMBER)
327 .ok()
328 .and_then(|e| e.to_int::<i32>().ok())
329 .unwrap_or(0),
330 }
331 }
332}
333
334fn read_meta_tag(path: &Path, tag: dicom::core::Tag) -> Option<String> {
335 let obj = OpenFileOptions::new()
336 .read_until(tags::PIXEL_DATA)
337 .open_file(path)
338 .ok()?;
339 obj.element(tag)
340 .ok()
341 .and_then(|e| e.to_str().ok())
342 .map(|s| s.trim_end_matches('\0').trim().to_string())
343}