//! dtx-parser (nom version) — find files generated by LaTeX .dtx / .ins source trees.
//!
//! Uses nom 8.x combinators for the structural parsing (brace groups, control
//! words, def-bodies) while keeping the macro-expansion logic imperative, since
//! TeX macro expansion is inherently stateful and context-sensitive — not a
//! good fit for pure parser combinators.

use std::collections::HashMap;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;

use nom::{
    IResult, Parser,
    bytes::complete::{tag, take_till, take_while, take_while1},
    character::complete::char,
    combinator::recognize,
    sequence::preceded,
};

type NomErr<'a> = nom::error::Error<&'a str>;
type PR<'a, O> = IResult<&'a str, O, NomErr<'a>>;

/// Collect all .dtx / .ins files reachable from `path`.
/// If `path` is a file with the right extension, return it directly.
/// If `path` is a directory, walk it recursively.
/// Anything else (wrong extension, symlink to neither) is silently skipped.
/// Read a file as a String.  Tries UTF-8 first (zero-copy check); if that
/// fails (e.g. ISO-8859 encoded .dtx files) uses `from_utf8_lossy` which
/// replaces invalid sequences with U+FFFD.  This is SIMD-accelerated and
/// vastly faster than a per-byte Latin-1 decode for large files.
/// Invalid bytes only appear in comments/author names — never in the macro
/// names or filenames we parse — so lossy replacement is safe here.
pub fn read_file_contents(path: &Path) -> std::io::Result<String> {
    let bytes = std::fs::read(path)?;
    if std::str::from_utf8(&bytes).is_ok() {
        // SAFETY: we just verified it is valid UTF-8
        return Ok(unsafe { String::from_utf8_unchecked(bytes) });
    }
    // Lossy fallback: invalid bytes become U+FFFD, everything else preserved
    Ok(String::from_utf8_lossy(&bytes).into_owned())
}
fn collect_paths(path: &Path) -> Vec<PathBuf> {
    if path.is_file() {
        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
        if ext == "dtx" || ext == "ins" {
            return vec![path.to_path_buf()];
        } else {
            eprintln!(
                "Warning: '{}' is not a .dtx or .ins file — skipping.",
                path.display()
            );
            return Vec::new();
        }
    }

    if path.is_dir() {
        return WalkDir::new(path)
            .follow_links(true)
            .into_iter()
            .filter_map(|e| e.ok())
            .filter(|e| e.path().is_file())
            .filter(|e| {
                let ext = e.path().extension().and_then(|x| x.to_str()).unwrap_or("");
                ext == "dtx" || ext == "ins"
            })
            .map(|e| e.into_path())
            .collect();
    }

    eprintln!(
        "Warning: '{}' is neither a file nor a directory — skipping.",
        path.display()
    );
    Vec::new()
}

// ─── Macro table ─────────────────────────────────────────────────────────────

#[derive(Clone, Debug)]
struct MacroDef {
    param_count: usize,
    body: String,
    may_generate: bool,
}

type MacroTable = HashMap<String, MacroDef>;

// ─── nom primitive parsers ────────────────────────────────────────────────────

/// TeX letter: ASCII alpha or `@`.
fn is_tex_letter(c: char) -> bool {
    c.is_ascii_alphabetic() || c == '@'
}

/// Parse a TeX control word name (letters + @), returning the name as &str.
/// Called after the `\` has already been consumed.
fn tex_control_word<'a>(input: &'a str) -> PR<'a, &'a str> {
    take_while1(is_tex_letter)(input)
}

/// Parse a TeX comment: `%` up to (but not including) the newline.
fn tex_comment(input: &str) -> PR<'_, ()> {
    let (input, _) = char('%')(input)?;
    let (input, _) = take_till(|c| c == '\n')(input)?;
    Ok((input, ()))
}

/// Skip any mix of ASCII whitespace and `%`-comments.
fn ws(mut input: &str) -> PR<'_, ()> {
    loop {
        // skip whitespace
        let (rest, _) = take_while(|c: char| c.is_ascii_whitespace())(input)?;
        input = rest;
        // try to consume a comment
        match tex_comment(input) {
            Ok((rest, _)) => input = rest,
            Err(_) => break,
        }
    }
    Ok((input, ()))
}

/// Parse a brace-group `{…}` and return its inner content as &str.
/// Handles nested braces and `%`-comments (so `%}` doesn't close the group).
fn brace_group<'a>(input: &'a str) -> PR<'a, &'a str> {
    // We need to track brace depth manually; nom's `delimited` can't do that.
    // We use `nom::bytes::complete::take` indirectly via a manual scan.
    let (input, _) = char::<&str, NomErr>('{')(input)?;
    let bytes = input.as_bytes();
    let mut depth = 1usize;
    let mut i = 0;
    while i < bytes.len() {
        match bytes[i] {
            b'%' => {
                i += 1;
                while i < bytes.len() && bytes[i] != b'\n' {
                    i += 1;
                }
            }
            b'{' => {
                depth += 1;
                i += 1;
            }
            b'}' => {
                depth -= 1;
                if depth == 0 {
                    return Ok((&input[i + 1..], &input[..i]));
                }
                i += 1;
            }
            _ => i += 1, // Non-ASCII bytes never match {, }, %, \n: safe
        }
    }
    // Unterminated group — return what we have (mirrors hand-rolled behaviour)
    Ok(("", input))
}

/// Parse and discard a single brace-group (fast depth-only, no slice needed).
fn skip_brace_group(input: &str) -> PR<'_, ()> {
    let (input, _) = ws(input)?;
    let (input, _) = char::<&str, NomErr>('{')(input)?;
    let bytes = input.as_bytes();
    let mut depth = 1usize;
    let mut i = 0;
    while i < bytes.len() {
        match bytes[i] {
            b'%' => {
                i += 1;
                while i < bytes.len() && bytes[i] != b'\n' {
                    i += 1;
                }
            }
            b'{' => {
                depth += 1;
                i += 1;
            }
            b'}' => {
                depth -= 1;
                i += 1;
                if depth == 0 {
                    return Ok((&input[i..], ()));
                }
            }
            _ => i += 1,
        }
    }
    Ok(("", ()))
}

/// Parse ws + brace_group, returning inner content.
/// Direct implementation avoids the `preceded` combinator allocation overhead.
fn ws_brace_group<'a>(input: &'a str) -> PR<'a, &'a str> {
    let (input, _) = ws(input)?;
    brace_group(input)
}

// ─── Control-word matching helper ────────────────────────────────────────────

/// Match `\name` as a complete control word (not a prefix of a longer name).
/// Consumes `\name` and returns the rest if the next char is not a TeX letter.
fn match_cmd<'a>(name: &'static str) -> impl Fn(&'a str) -> PR<'a, ()> {
    move |input: &'a str| {
        let (input, _) = char::<&'a str, NomErr<'a>>('\\')(input)?;
        let (input, _) = tag(name)(input)?;
        // Ensure it's a complete word
        if input.starts_with(is_tex_letter) {
            return Err(nom::Err::Error(nom::error::Error::new(
                input,
                nom::error::ErrorKind::Tag,
            )));
        }
        Ok((input, ()))
    }
}

// ─── Top-level parse ─────────────────────────────────────────────────────────

/// Scan `content` for a `\generate`, `\generateFile`, or
/// `\begin{filecontents` command that is **not** inside a `%`-comment.
/// Used as a fast pre-filter before full parsing.
fn has_generate_cmd(content: &str) -> bool {
    let bytes = content.as_bytes();
    let mut i = 0;
    while i < bytes.len() {
        match bytes[i] {
            b'%' => {
                // Skip to end of line
                i += 1;
                while i < bytes.len() && bytes[i] != b'\n' {
                    i += 1;
                }
            }
            b'\\' => {
                let rest = &bytes[i + 1..];
                if rest.starts_with(b"generate") {
                    let after = &rest[b"generate".len()..];
                    let next = after.first().copied().unwrap_or(0);
                    if !next.is_ascii_alphabetic() && next != b'@' {
                        return true; // bare \generate
                    }
                    if after.starts_with(b"File") {
                        let after_file = &after[b"File".len()..];
                        let next2 = after_file.first().copied().unwrap_or(0);
                        if !next2.is_ascii_alphabetic() && next2 != b'@' {
                            return true; // \generateFile
                        }
                    }
                }
                // \begin{filecontents} or \begin{filecontents*}
                if rest.starts_with(b"begin") {
                    let after = &rest[b"begin".len()..];
                    // skip optional whitespace
                    let after = after
                        .iter()
                        .position(|&b| !b.is_ascii_whitespace())
                        .map(|p| &after[p..])
                        .unwrap_or(after);
                    if after.starts_with(b"{filecontents") {
                        return true;
                    }
                }
                i += 1;
            }
            _ => i += 1,
        }
    }
    false
}

pub fn parse_file(path: &Path, content: &str) -> Vec<String> {
    // Fast pre-check: scan for \generate / \generateFile outside comments.
    // A plain contains("\\generate") would fire on comment lines like
    // "% use \generate to..." — this avoids that false positive.
    if !has_generate_cmd(content) {
        return Vec::new();
    }

    let jobname = path
        .file_stem()
        .and_then(|s| s.to_str())
        .unwrap_or("jobname")
        .to_owned();

    let mut macros = MacroTable::new();
    let mut targets: Vec<String> = Vec::with_capacity(16);

    let mut s = content;
    loop {
        // Jump to next backslash — everything else is prose
        s = advance_to_backslash(s);
        if s.is_empty() {
            break;
        }

        // Try each command we care about
        if let Ok((rest, ())) = match_cmd("def")(s) {
            s = handle_def(rest, &mut macros);
        } else if let Ok((rest, ())) = match_cmd("generateFile")(s) {
            s = handle_generate_file(rest, &macros, &jobname, &mut targets);
        } else if let Ok((rest, ())) = match_cmd("generate")(s) {
            s = handle_generate(rest, &macros, &jobname, &mut targets);
        } else if let Ok((rest, ())) = match_cmd("begin")(s) {
            s = handle_filecontents(rest, &macros, &jobname, &mut targets);
        } else {
            // Could be a user macro — try to look it up
            if let Ok((rest, name)) = preceded(tag("\\"), tex_control_word).parse(s) {
                if let Some(def) = macros.get(name).cloned() {
                    if def.may_generate {
                        let (args, after) = read_raw_args(rest, def.param_count);
                        s = after;
                        let substituted = substitute_params(&def.body, &args);
                        scan_for_generates(&substituted, &macros, &jobname, &mut targets, 0);
                    } else {
                        s = skip_n_brace_groups(rest, def.param_count);
                    }
                    continue;
                }
                s = rest;
            } else {
                // Advance past this `\`
                s = &s[1..];
            }
        }
    }

    targets
}

/// Advance to the next `\` character, skipping `%`-comments.
fn advance_to_backslash(s: &str) -> &str {
    let bytes = s.as_bytes();
    let mut i = 0;
    while i < bytes.len() {
        match bytes[i] {
            b'\\' => return &s[i..],
            b'%' => {
                i += 1;
                while i < bytes.len() && bytes[i] != b'\n' {
                    i += 1;
                }
            }
            _ => i += 1, // Non-ASCII bytes never match \\ or %: safe
        }
    }
    ""
}

// ─── Scan helper (re-entrant on expanded text) ────────────────────────────────

const MAX_SCAN_DEPTH: usize = 16;

fn scan_for_generates(
    text: &str,
    macros: &MacroTable,
    jobname: &str,
    targets: &mut Vec<String>,
    depth: usize,
) {
    if depth > MAX_SCAN_DEPTH {
        return;
    }
    let mut s = text;
    loop {
        s = advance_to_backslash(s);
        if s.is_empty() {
            break;
        }
        if let Ok((rest, ())) = match_cmd("generateFile")(s) {
            s = handle_generate_file(rest, macros, jobname, targets);
        } else if let Ok((rest, ())) = match_cmd("generate")(s) {
            s = handle_generate(rest, macros, jobname, targets);
        } else if let Ok((rest, ())) = match_cmd("begin")(s) {
            s = handle_filecontents(rest, macros, jobname, targets);
        } else if let Ok((rest, name)) = preceded(tag("\\"), tex_control_word).parse(s) {
            if let Some(def) = macros.get(name).cloned() {
                if def.may_generate {
                    let (args, after) = read_raw_args(rest, def.param_count);
                    s = after;
                    let substituted = substitute_params(&def.body, &args);
                    scan_for_generates(&substituted, macros, jobname, targets, depth + 1);
                    continue;
                } else {
                    s = skip_n_brace_groups(rest, def.param_count);
                }
            } else {
                s = rest;
            }
        } else {
            s = &s[1..]; // skip the `\`
        }
    }
}

// ─── \def handler ────────────────────────────────────────────────────────────

fn handle_def<'a>(s: &'a str, macros: &mut MacroTable) -> &'a str {
    // Parse: ws \ MacroName [#1 #2 … #N] { body }
    let Ok((s, _)) = ws(s) else { return s };
    let Ok((s, _)): PR<_> = char('\\')(s) else {
        return s;
    };
    let Ok((s, name)) = tex_control_word(s) else {
        return s;
    };

    let Ok((mut s, _)) = ws(s) else { return s };

    // Count parameter specs #1 … #N
    let mut param_count = 0usize;
    while let Ok((rest, _)) = recognize((
        char::<&str, NomErr<'_>>('#'),
        nom::character::complete::satisfy(|c| c.is_ascii_digit()),
    ))
    .parse(s)
    {
        param_count += 1;
        s = rest;
        let Ok((rest2, _)) = ws(s) else { break };
        s = rest2;
    }

    // Body
    match brace_group(s) {
        Ok((rest, body)) => {
            let may_generate = body_may_generate(body, macros);
            macros.insert(
                name.to_owned(),
                MacroDef {
                    param_count,
                    body: body.to_owned(),
                    may_generate,
                },
            );
            rest
        }
        Err(_) => s,
    }
}

// ─── \begin{filecontents} handler ───────────────────────────────────────────

/// Advance past the body of a filecontents environment, stopping after
/// `\end{env_name}`.
fn skip_filecontents_body<'a>(s: &'a str, env_name: &str) -> &'a str {
    let end_tag = format!("\\end{{{}}}", env_name);
    match s.find(end_tag.as_str()) {
        Some(pos) => &s[pos + end_tag.len()..],
        None => "", // malformed — no matching \end found
    }
}

/// Handle `\begin{filecontents}` and `\begin{filecontents*}`.
/// Syntax (LaTeX 2019+ also allows an optional `[options]` before `{filename}`):
///   \begin{filecontents}{filename}
///   \begin{filecontents*}{filename}
///   \begin{filecontents*}[options]{filename}
/// `s` is positioned right after the `\begin` token.
fn handle_filecontents<'a>(
    s: &'a str,
    macros: &MacroTable,
    jobname: &str,
    targets: &mut Vec<String>,
) -> &'a str {
    // Expect {filecontents} or {filecontents*}  — nom uses ws_brace_group
    let Ok((s, env_name)) = ws_brace_group(s) else {
        return s;
    };
    let env_name = env_name.trim();
    if env_name != "filecontents" && env_name != "filecontents*" {
        return s;
    }
    let env_name = env_name.to_owned();
    let Ok((s, _)) = ws(s) else { return s };
    // Skip optional [options] group (LaTeX 2019+)
    let s = if s.starts_with('[') {
        let bytes = s.as_bytes();
        let mut i = 1;
        while i < bytes.len() && bytes[i] != b']' {
            i += 1;
        }
        &s[i + 1..]
    } else {
        s
    };
    // Next brace group is the filename
    let Ok((s, raw)) = ws_brace_group(s) else {
        return s;
    };
    let target = expand_text(raw, macros, jobname);
    let target = target.trim().to_owned();
    if !target.is_empty() {
        targets.push(target);
    }
    // Skip the environment body up to \end{filecontents} / \end{filecontents*}
    skip_filecontents_body(s, &env_name)
}

// ─── \generateFile handler ───────────────────────────────────────────────────

/// `s` is right after `\generateFile`.
/// Syntax: `\generateFile{target}{flag}{spec}`
fn body_may_generate(body: &str, macros: &MacroTable) -> bool {
    let bytes = body.as_bytes();
    let mut i = 0;
    while i < bytes.len() {
        match bytes[i] {
            b'%' => {
                i += 1;
                while i < bytes.len() && bytes[i] != b'\n' {
                    i += 1;
                }
            }
            b'\\' => {
                i += 1;
                if i >= bytes.len() {
                    break;
                }
                if is_tex_letter(bytes[i] as char) {
                    let start = i;
                    while i < bytes.len() && is_tex_letter(bytes[i] as char) {
                        i += 1;
                    }
                    let name = &body[start..i];
                    if name == "generate" || name == "generateFile" {
                        return true;
                    }
                    if let Some(def) = macros.get(name)
                        && def.may_generate
                    {
                        return true;
                    }
                } else {
                    i += 1;
                }
            }
            _ => i += 1,
        }
    }
    false
}

fn handle_generate_file<'a>(
    s: &'a str,
    macros: &MacroTable,
    jobname: &str,
    targets: &mut Vec<String>,
) -> &'a str {
    match ws_brace_group(s) {
        Ok((rest, raw)) => {
            let target = expand_text(raw, macros, jobname);
            let target = target.trim().to_owned();
            if !target.is_empty() {
                targets.push(target);
            }
            // Discard flag + spec groups
            skip_n_brace_groups(rest, 2)
        }
        Err(_) => s,
    }
}

// ─── \generate handler ───────────────────────────────────────────────────────

fn handle_generate<'a>(
    s: &'a str,
    macros: &MacroTable,
    jobname: &str,
    targets: &mut Vec<String>,
) -> &'a str {
    let Ok((s2, _)) = ws(s) else { return s };

    // \generate\file{target}{spec}
    if let Ok((rest, ())) = match_cmd("file")(s2) {
        return handle_file_group(rest, macros, jobname, targets);
    }

    // \generate{…}
    if let Ok((rest_outer, inner)) = brace_group(s2) {
        let inner_trimmed = inner.trim_start_matches(|c: char| c.is_ascii_whitespace());
        // Strip leading comments to find the first real token
        let inner_trimmed = skip_ws_str(inner_trimmed);

        if inner_trimmed.starts_with('\\') {
            // Body form: scan for \file calls inside
            let mut inner_s = inner;
            loop {
                inner_s = advance_to_backslash(inner_s);
                if inner_s.is_empty() {
                    break;
                }
                if let Ok((after, ())) = match_cmd("file")(inner_s) {
                    inner_s = handle_file_group(after, macros, jobname, targets);
                } else {
                    // Skip any other command name
                    if let Ok((after, _)) = preceded(tag("\\"), tex_control_word).parse(inner_s) {
                        inner_s = after;
                    } else {
                        inner_s = &inner_s[1..];
                    }
                }
            }
            return rest_outer;
        }

        // Bare \generate{target}
        let target = expand_text(inner, macros, jobname);
        let target = target.trim().to_owned();
        if !target.is_empty() {
            targets.push(target);
        }
        return skip_n_brace_groups(rest_outer, 1);
    }

    s
}

fn handle_file_group<'a>(
    s: &'a str,
    macros: &MacroTable,
    jobname: &str,
    targets: &mut Vec<String>,
) -> &'a str {
    match ws_brace_group(s) {
        Ok((rest, raw)) => {
            let target = expand_text(raw, macros, jobname);
            let target = target.trim().to_owned();
            if !target.is_empty() {
                targets.push(target);
            }
            // Read the spec group and scan it for nested \file calls
            // (some .ins files chain \file inside the spec via trailing %)
            match ws_brace_group(rest) {
                Ok((after_spec, spec)) => {
                    let mut inner = spec;
                    loop {
                        inner = advance_to_backslash(inner);
                        if inner.is_empty() {
                            break;
                        }
                        if let Ok((after_file, ())) = match_cmd("file")(inner) {
                            inner = handle_file_group(after_file, macros, jobname, targets);
                        } else {
                            // skip this control word
                            if let Ok((after, _)) =
                                preceded(tag("\\"), tex_control_word).parse(inner)
                            {
                                inner = after;
                            } else {
                                inner = &inner[1..];
                            }
                        }
                    }
                    after_spec
                }
                Err(_) => rest,
            }
        }
        Err(_) => s,
    }
}

/// Skip whitespace+comments using our `ws` parser on a plain string.
fn skip_ws_str(s: &str) -> &str {
    ws(s).map(|(rest, _)| rest).unwrap_or(s)
}

/// Skip `n` brace groups (discard content).
fn skip_n_brace_groups(mut s: &str, n: usize) -> &str {
    for _ in 0..n {
        if let Ok((rest, ())) = skip_brace_group(s) {
            s = rest;
        } else {
            break;
        }
    }
    s
}

// ─── Argument reading ─────────────────────────────────────────────────────────

fn read_raw_args(s: &str, n: usize) -> (Vec<String>, &str) {
    let mut args = Vec::with_capacity(n);
    let mut s = s;
    for _ in 0..n {
        match ws_brace_group(s) {
            Ok((rest, raw)) => {
                args.push(raw.to_owned());
                s = rest;
            }
            Err(_) => args.push(String::new()),
        }
    }
    (args, s)
}

fn substitute_params(text: &str, args: &[String]) -> String {
    let mut result = String::with_capacity(text.len());
    let mut chars = text.char_indices().peekable();
    while let Some((i, ch)) = chars.next() {
        if ch == '#' {
            let next_byte = text.as_bytes().get(i + 1).copied();
            if let Some(b) = next_byte
                && b.is_ascii_digit()
                && b != b'0'
            {
                let idx = (b - b'0') as usize;
                if let Some(arg) = args.get(idx - 1) {
                    result.push_str(arg);
                }
                chars.next(); // consume the digit
                continue;
            }
        }
        result.push(ch);
    }
    result
}

// ─── Text expansion ───────────────────────────────────────────────────────────

use std::borrow::Cow;

const MAX_EXPAND_DEPTH: usize = 16;

fn expand_text<'a>(text: &'a str, macros: &MacroTable, jobname: &str) -> Cow<'a, str> {
    if !text.as_bytes().contains(&b'\\') {
        return Cow::Borrowed(text);
    }
    Cow::Owned(expand_text_depth(text, macros, jobname, 0))
}

fn expand_text_depth(text: &str, macros: &MacroTable, jobname: &str, depth: usize) -> String {
    if depth > MAX_EXPAND_DEPTH {
        return text.to_owned();
    }

    let mut result = String::with_capacity(text.len());
    let bytes = text.as_bytes();
    let mut i = 0;

    while i < bytes.len() {
        if bytes[i] == b'%' {
            while i < bytes.len() && bytes[i] != b'\n' {
                i += 1;
            }
            continue;
        }

        if bytes[i] != b'\\' {
            let ch_str = &text[i..];
            if let Some(ch) = ch_str.chars().next() {
                result.push(ch);
                i += ch.len_utf8();
            } else {
                i += 1;
            }
            continue;
        }

        i += 1; // skip backslash
        if i >= bytes.len() {
            result.push('\\');
            break;
        }

        if !is_tex_letter(bytes[i] as char) {
            result.push('\\');
            let ch_str = &text[i..];
            if let Some(ch) = ch_str.chars().next() {
                result.push(ch);
                i += ch.len_utf8();
            }
            continue;
        }

        // Control word
        let start = i;
        while i < bytes.len() && is_tex_letter(bytes[i] as char) {
            i += 1;
        }
        let name = &text[start..i];

        if name == "jobname" {
            result.push_str(jobname);
        } else if let Some(def) = macros.get(name).cloned() {
            if def.param_count == 0 {
                let expanded = expand_text_depth(&def.body, macros, jobname, depth + 1);
                result.push_str(&expanded);
            } else {
                let rest_text = &text[i..];
                let (args, after_args) =
                    read_args_from_str(rest_text, def.param_count, macros, jobname);
                let substituted = substitute_params(&def.body, &args);
                let expanded = expand_text_depth(&substituted, macros, jobname, depth + 1);
                result.push_str(&expanded);
                i = text.len() - after_args.len();
            }
        } else {
            result.push('\\');
            result.push_str(name);
        }
    }

    result
}

fn read_args_from_str<'a>(
    s: &'a str,
    n: usize,
    macros: &MacroTable,
    jobname: &str,
) -> (Vec<String>, &'a str) {
    let mut args = Vec::with_capacity(n);
    let mut s = s;
    for _ in 0..n {
        match ws_brace_group(s) {
            Ok((rest, raw)) => {
                args.push(expand_text(raw, macros, jobname).into_owned());
                s = rest;
            }
            Err(_) => args.push(String::new()),
        }
    }
    (args, s)
}

// ─── Tests ───────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;
    use std::path::Path;

    fn parse(filename: &str, src: &str) -> Vec<String> {
        parse_file(Path::new(filename), src)
    }

    #[test]
    fn test_generate_file_basic() {
        let src = r#"\generateFile{bondcolor.drv}{t}{\from{bondcolor.dtx}{driver}}"#;
        assert_eq!(parse("bondcolor.dtx", src), vec!["bondcolor.drv"]);
    }

    #[test]
    fn test_jobname_in_generate_file() {
        let src = r#"\generateFile{\jobname.sty}{t}{\from{\jobname.dtx}{package}}"#;
        assert_eq!(parse("mybundle.dtx", src), vec!["mybundle.sty"]);
    }

    #[test]
    fn test_def_zero_arg() {
        let src = r#"
\def\myfile{coolpackage}
\generateFile{\myfile.sty}{t}{\from{\myfile.dtx}{package}}
"#;
        assert_eq!(parse("pkg.dtx", src), vec!["coolpackage.sty"]);
    }

    #[test]
    fn test_generate_backslash_file() {
        let src = r#"\generate\file{output.tex}{\from{source.dtx}{body}}"#;
        assert_eq!(parse("source.dtx", src), vec!["output.tex"]);
    }

    #[test]
    fn test_generate_body_with_file() {
        let src =
            r#"\generate{\file{a.sty}{\from{pkg.dtx}{style}}\file{b.drv}{\from{pkg.dtx}{driver}}}"#;
        assert_eq!(parse("pkg.dtx", src), vec!["a.sty", "b.drv"]);
    }

    #[test]
    fn test_multiple_generate_file() {
        let src = r#"
\generateFile{a.sty}{t}{\from{pkg.dtx}{style}}
\generateFile{b.drv}{t}{\from{pkg.dtx}{driver}}
"#;
        assert_eq!(parse("pkg.dtx", src), vec!["a.sty", "b.drv"]);
    }

    #[test]
    fn test_comment_between_command_and_brace() {
        let src = "\\generateFile% a comment\n{result.tex}{t}{\\from{x.dtx}{}}";
        assert_eq!(parse("x.dtx", src), vec!["result.tex"]);
    }

    #[test]
    fn test_parameterised_macro_extract() {
        let src = r#"
\def\Extract#1#2#3{\generate{\file{#1.#2}{\from{\jobname.dtx}{#3}}}}

\Extract{\jobname}{sty}{package}
\Extract{README}{md}{readme}
\Extract{etc}{uue}{etc}
\Extract{make}{sh}{make}
"#;
        let out = parse("mybundle.dtx", src);
        assert_eq!(out, vec!["mybundle.sty", "README.md", "etc.uue", "make.sh"]);
    }

    #[test]
    fn test_second_def_shadows_first() {
        let src = r#"
\def\Extract#1#2#3{\generate{\file{#1.#2}{\from{\jobname.dtx}{#3}}}}
\def\Extract#1#2#3{}

\Extract{\jobname}{sty}{package}
\Extract{README}{md}{readme}
"#;
        let out = parse("mybundle.dtx", src);
        assert!(out.is_empty(), "expected empty, got {out:?}");
    }

    #[test]
    fn test_jobname_in_macro_arg() {
        let src = r#"
\def\MkFile#1#2{\generateFile{#1.#2}{t}{\from{\jobname.dtx}{#2}}}
\MkFile{\jobname}{sty}
\MkFile{extra}{ins}
"#;
        let out = parse("mypackage.dtx", src);
        assert_eq!(out, vec!["mypackage.sty", "extra.ins"]);
    }

    #[test]
    fn test_zero_arg_macro_with_generate() {
        let src = r#"
\def\DoGenerate{\generateFile{auto.sty}{t}{\from{auto.dtx}{style}}}
\DoGenerate
"#;
        let out = parse("auto.dtx", src);
        assert_eq!(out, vec!["auto.sty"]);
    }

    #[test]
    fn test_nwejm_pattern() {
        let src = r#"
\def\NWEJM@classname{\jobname}
\def\NWEJM@addons{addons}
\def\NWEJM@examplestemplate{\jobname-examples-template}
%
\usedir{tex/latex/\NWEJM@classname}
\generate{%
  \file{\NWEJM@classname.cls}{\from{\jobname.dtx}{class}}
  \file{\NWEJM@classname art.cls}{\from{\jobname.dtx}{class-article}}
  \file{\NWEJM@classname.dbx}{\from{\jobname.dtx}{datamodel}}
  \file{\NWEJM@classname.cbx}{\from{\jobname.dtx}{citestyle}}
  \file{\NWEJM@classname.bbx}{\from{\jobname.dtx}{bibstyle}}
  \file{\NWEJM@classname.lbx}{\from{\jobname.dtx}{languagemodel}}
  \nopreamble\nopostamble
  \file{\NWEJM@classname.cfg}{\from{\jobname.dtx}{configuration}}
  \file{\NWEJM@classname-english.trsl}{\from{\jobname.dtx}{english}}
  \file{\NWEJM@classname-french.trsl}{\from{\jobname.dtx}{french}}
  \file{\NWEJM@classname-german.trsl}{\from{\jobname.dtx}{german}}
  \file{\NWEJM@classname-dutch.trsl}{\from{\jobname.dtx}{dutch}}
}%
"#;
        let out = parse("NWEJM.dtx", src);
        assert_eq!(
            out,
            vec![
                "NWEJM.cls",
                "NWEJM art.cls",
                "NWEJM.dbx",
                "NWEJM.cbx",
                "NWEJM.bbx",
                "NWEJM.lbx",
                "NWEJM.cfg",
                "NWEJM-english.trsl",
                "NWEJM-french.trsl",
                "NWEJM-german.trsl",
                "NWEJM-dutch.trsl",
            ]
        );
    }

    #[test]
    fn test_file_not_matched_as_prefix_of_longer_name() {
        let src = r#"\generate{\filecontents{some.tex}{body}\file{real.sty}{\from{x.dtx}{s}}}"#;
        let out = parse("x.dtx", src);
        assert_eq!(out, vec!["real.sty"]);
    }

    #[test]
    fn test_unicode_in_comment() {
        let src = "% Ma\u{00EF}eul Rouquette \u{2014} ma\u{00EF}eul dot net\n\\generateFile{pkg.sty}{t}{\\from{pkg.dtx}{package}}\n";
        let out = parse("pkg.dtx", src);
        assert_eq!(out, vec!["pkg.sty"]);
    }

    #[test]
    fn test_unicode_inside_brace_group() {
        let src =
            "\\generateFile{r\u{00E9}sum\u{00E9}.sty}{t}{\\from{r\u{00E9}sum\u{00E9}.dtx}{pkg}}";
        let out = parse("r\u{00E9}sum\u{00E9}.dtx", src);
        assert_eq!(out, vec!["r\u{00E9}sum\u{00E9}.sty"]);
    }
    #[test]
    fn test_filecontents_star() {
        let src = "\\begin{filecontents*}{\\jobname.bib}\n@article{x}{}\n\\end{filecontents*}";
        let out = parse("mypaper.dtx", src);
        assert_eq!(out, vec!["mypaper.bib"]);
    }

    #[test]
    fn test_filecontents_no_star() {
        let src = "\\begin{filecontents}{readme.txt}\nSome content\n\\end{filecontents}";
        let out = parse("pkg.dtx", src);
        assert_eq!(out, vec!["readme.txt"]);
    }

    #[test]
    fn test_filecontents_with_options() {
        // LaTeX 2019+ optional [options] before filename
        let src = "\\begin{filecontents*}[overwrite]{data.csv}\na,b,c\n\\end{filecontents*}";
        let out = parse("pkg.dtx", src);
        assert_eq!(out, vec!["data.csv"]);
    }

    #[test]
    fn test_filecontents_in_comment_not_matched() {
        // \begin{filecontents} in a comment must not produce a result
        let src =
            "% \\begin{filecontents}{ignored.txt}\n\\generateFile{real.sty}{t}{\\from{x.dtx}{s}}";
        let out = parse("x.dtx", src);
        assert_eq!(out, vec!["real.sty"]);
    }

    #[test]
    fn test_file_chained_in_spec_via_comment() {
        // Real-world pattern: \file spec is left unclosed by trailing %,
        // chaining further \file calls inside it.
        let src = r#"\generate{%
  \usepreamble\package\file{a.cls}{\from{pkg.dtx}{cls}%
  \nopreamble\nopostamble\file{build.sh}{\from{pkg.dtx}{build}}%
  \nopreamble\nopostamble\file{clean.sh}{\from{pkg.dtx}{clean}}%
  \file{b.sty}{\from{pkg.dtx}{sty}}
}"#;
        let out = parse("pkg.dtx", src);
        assert_eq!(out, vec!["a.cls", "build.sh", "clean.sh", "b.sty"]);
    }

    #[test]
    fn test_filecontents_body_not_parsed() {
        // Commands inside the filecontents body must not be treated as
        // real \generate statements.
        let src = r#"
\begin{filecontents*}{mymacros.sty}
\def\Extract#1#2#3{\generate{\file{#1.#2}{\from{\jobname.dtx}{#3}}}}
\Extract{fake}{tex}{fake}
\end{filecontents*}
\generateFile{real.sty}{t}{\from{pkg.dtx}{package}}
"#;
        let out = parse("pkg.dtx", src);
        assert_eq!(out, vec!["mymacros.sty", "real.sty"]);
    }

    #[test]
    fn test_filecontents_no_star_body_skipped() {
        let src = r#"
\begin{filecontents}{helper.tex}
\generateFile{fake.sty}{t}{\from{x.dtx}{s}}
\end{filecontents}
\generateFile{real.sty}{t}{\from{pkg.dtx}{s}}
"#;
        let out = parse("pkg.dtx", src);
        assert_eq!(out, vec!["helper.tex", "real.sty"]);
    }
}
