Skip to main content

ytil_agents/agent/session_parser/
claude.rs

1use std::path::PathBuf;
2
3use chrono::DateTime;
4use chrono::Utc;
5use rootcause::option_ext::OptionExt;
6use rootcause::prelude::ResultExt;
7use serde::Deserialize;
8use serde::de::IgnoredAny;
9
10use crate::agent::Agent;
11use crate::agent::session::SearchTextBuilder;
12use crate::agent::session::Session;
13
14/// Parse one Claude JSONL session file.
15///
16/// # Errors
17/// Returns an error when the JSONL cannot be parsed or required session metadata is missing.
18pub fn parse(content: &str) -> rootcause::Result<ClaudeSession> {
19    let mut session_id = None;
20    let mut workspace_dir = None;
21    let mut created_at = None;
22    let mut updated_at = None;
23    let mut first_user_message = None;
24    let mut search_text = SearchTextBuilder::default();
25
26    for (line_idx, line) in content.lines().enumerate() {
27        let line = serde_json::from_str::<ClaudeSessionLine>(line)
28            .context("failed to parse Claude session json line".to_owned())
29            .attach(format!("line_number={}", line_idx.saturating_add(1)))
30            .attach(format!("line={line}"))?;
31
32        if let Some(timestamp) = line.timestamp() {
33            updated_at = Some(timestamp);
34        }
35
36        if let Some(meta) = line.session_meta() {
37            session_id.get_or_insert_with(|| meta.session_id.to_owned());
38            workspace_dir.get_or_insert_with(|| PathBuf::from(meta.cwd));
39            created_at.get_or_insert(meta.timestamp);
40        }
41
42        if let Some(user_message) = line.user_search_text() {
43            if first_user_message.is_none() {
44                first_user_message = Some(user_message.clone());
45            }
46            search_text.push(&user_message);
47        }
48        if let Some(assistant_message) = line.assistant_search_text() {
49            search_text.push(&assistant_message);
50        }
51    }
52
53    let session_id = session_id.context("no Claude session record found".to_owned())?;
54    let workspace_dir = workspace_dir.context("no Claude session record found".to_owned())?;
55    let created_at = created_at.context("no Claude session record found".to_owned())?;
56
57    let name = first_user_message.unwrap_or_else(|| {
58        workspace_dir
59            .file_name()
60            .and_then(|name| name.to_str())
61            .filter(|name| !name.is_empty())
62            .map_or_else(|| session_id.clone(), str::to_owned)
63    });
64    let search_text = search_text.build(&name);
65
66    Ok(ClaudeSession {
67        id: session_id,
68        name,
69        search_text,
70        workspace: workspace_dir,
71        created_at,
72        updated_at: updated_at.unwrap_or(created_at),
73    })
74}
75
76#[derive(Clone, Debug, Eq, PartialEq)]
77pub struct ClaudeSession {
78    pub id: String,
79    pub name: String,
80    pub search_text: String,
81    pub workspace: PathBuf,
82    pub created_at: DateTime<Utc>,
83    pub updated_at: DateTime<Utc>,
84}
85
86impl ClaudeSession {
87    pub fn into_session(self, path: PathBuf) -> Session {
88        let mut session = Session::new(Agent::Claude, self.id, self.workspace, path, None, self.created_at);
89        session.name = self.name;
90        session.search_text = self.search_text;
91        session.updated_at = self.updated_at;
92        session
93    }
94}
95
96#[derive(Debug, Deserialize)]
97#[serde(tag = "type")]
98enum ClaudeSessionLine {
99    #[serde(rename = "user")]
100    User(ClaudeUserLine),
101    #[serde(rename = "assistant")]
102    Assistant(ClaudeAssistantLine),
103    #[serde(rename = "progress")]
104    #[serde(alias = "system")]
105    #[serde(alias = "attachment")]
106    Metadata(ClaudeMetadataLine),
107    #[serde(rename = "queue-operation")]
108    TimestampOnly(ClaudeTimestampedLine),
109    #[serde(other)]
110    Other,
111}
112
113impl ClaudeSessionLine {
114    const fn timestamp(&self) -> Option<DateTime<Utc>> {
115        match self {
116            Self::User(line) => Some(line.timestamp),
117            Self::Assistant(line) => Some(line.timestamp),
118            Self::Metadata(line) => Some(line.timestamp),
119            Self::TimestampOnly(line) => Some(line.timestamp),
120            Self::Other => None,
121        }
122    }
123
124    fn session_meta(&self) -> Option<ClaudeSessionMeta<'_>> {
125        match self {
126            Self::User(line) => Some(ClaudeSessionMeta::from(line)),
127            Self::Assistant(line) => Some(ClaudeSessionMeta::from(line)),
128            Self::Metadata(line) => Some(ClaudeSessionMeta::from(line)),
129            Self::TimestampOnly(_) | Self::Other => None,
130        }
131    }
132
133    fn user_search_text(&self) -> Option<String> {
134        match self {
135            Self::User(line) if !line.is_meta => line.message.content.search_text(),
136            Self::User(_) | Self::Assistant(_) | Self::Metadata(_) | Self::TimestampOnly(_) | Self::Other => None,
137        }
138    }
139
140    fn assistant_search_text(&self) -> Option<String> {
141        match self {
142            Self::Assistant(line) => line.message.search_text(),
143            Self::User(_) | Self::Metadata(_) | Self::TimestampOnly(_) | Self::Other => None,
144        }
145    }
146}
147
148struct ClaudeSessionMeta<'a> {
149    session_id: &'a str,
150    cwd: &'a str,
151    timestamp: DateTime<Utc>,
152}
153
154#[derive(Debug, Deserialize)]
155struct ClaudeUserLine {
156    #[serde(rename = "sessionId")]
157    session_id: String,
158    cwd: String,
159    timestamp: DateTime<Utc>,
160    #[serde(default, rename = "isMeta")]
161    is_meta: bool,
162    message: ClaudeUserMessage,
163}
164
165#[derive(Debug, Deserialize)]
166struct ClaudeAssistantLine {
167    #[serde(rename = "sessionId")]
168    session_id: String,
169    cwd: String,
170    timestamp: DateTime<Utc>,
171    message: ClaudeAssistantMessage,
172}
173
174#[derive(Debug, Deserialize)]
175struct ClaudeMetadataLine {
176    #[serde(rename = "sessionId")]
177    session_id: String,
178    cwd: String,
179    timestamp: DateTime<Utc>,
180}
181
182impl<'a> From<&'a ClaudeUserLine> for ClaudeSessionMeta<'a> {
183    fn from(value: &'a ClaudeUserLine) -> Self {
184        Self {
185            session_id: &value.session_id,
186            cwd: &value.cwd,
187            timestamp: value.timestamp,
188        }
189    }
190}
191
192impl<'a> From<&'a ClaudeAssistantLine> for ClaudeSessionMeta<'a> {
193    fn from(value: &'a ClaudeAssistantLine) -> Self {
194        Self {
195            session_id: &value.session_id,
196            cwd: &value.cwd,
197            timestamp: value.timestamp,
198        }
199    }
200}
201
202impl<'a> From<&'a ClaudeMetadataLine> for ClaudeSessionMeta<'a> {
203    fn from(value: &'a ClaudeMetadataLine) -> Self {
204        Self {
205            session_id: &value.session_id,
206            cwd: &value.cwd,
207            timestamp: value.timestamp,
208        }
209    }
210}
211
212#[derive(Debug, Deserialize)]
213struct ClaudeTimestampedLine {
214    timestamp: DateTime<Utc>,
215}
216
217#[derive(Debug, Deserialize)]
218struct ClaudeUserMessage {
219    content: ClaudeUserContent,
220}
221
222#[derive(Debug, Deserialize)]
223struct ClaudeAssistantMessage {
224    #[serde(default)]
225    content: Vec<ClaudeAssistantContentPart>,
226}
227
228impl ClaudeAssistantMessage {
229    fn search_text(&self) -> Option<String> {
230        let mut search_text = SearchTextBuilder::default();
231        for snippet in self
232            .content
233            .iter()
234            .filter_map(ClaudeAssistantContentPart::assistant_search_text)
235        {
236            search_text.push(snippet);
237        }
238        let search_text = search_text.build("");
239        (!search_text.is_empty()).then_some(search_text)
240    }
241}
242
243#[cfg_attr(test, derive(PartialEq))]
244#[derive(Debug, Deserialize)]
245#[serde(untagged)]
246enum ClaudeUserContent {
247    Text(ClaudeUserText),
248    Parts(Vec<ClaudeUserContentPart>),
249}
250
251impl ClaudeUserContent {
252    fn search_text(&self) -> Option<String> {
253        match self {
254            Self::Text(text) => text.preview(),
255            Self::Parts(items) => {
256                let mut search_text = SearchTextBuilder::default();
257                for snippet in items.iter().filter_map(|item| match item {
258                    ClaudeUserContentPart::Text { text } => text.preview(),
259                    ClaudeUserContentPart::ToolResult { .. } | ClaudeUserContentPart::Other => None,
260                }) {
261                    search_text.push(&snippet);
262                }
263                let search_text = search_text.build("");
264                (!search_text.is_empty()).then_some(search_text)
265            }
266        }
267    }
268}
269
270#[cfg_attr(test, derive(PartialEq))]
271#[derive(Debug, Deserialize)]
272#[serde(tag = "type", rename_all = "snake_case")]
273enum ClaudeUserContentPart {
274    Text {
275        text: ClaudeUserText,
276    },
277    ToolResult {
278        #[serde(rename = "content")]
279        _content: IgnoredAny,
280    },
281    #[serde(other)]
282    Other,
283}
284
285#[derive(Debug, Deserialize)]
286#[serde(tag = "type", rename_all = "snake_case")]
287enum ClaudeAssistantContentPart {
288    Text {
289        text: String,
290    },
291    #[serde(other)]
292    Other,
293}
294
295impl ClaudeAssistantContentPart {
296    fn assistant_search_text(&self) -> Option<&str> {
297        match self {
298            Self::Text { text } => Some(text),
299            Self::Other => None,
300        }
301    }
302}
303
304#[derive(Clone, Debug, Eq, PartialEq)]
305enum ClaudeUserText {
306    Plain(String),
307    Cmd(ClaudeCmdInvocation),
308}
309
310impl ClaudeUserText {
311    fn preview(&self) -> Option<String> {
312        match self {
313            Self::Plain(text)
314                if matches!(
315                    text.trim_start(),
316                    text if text.starts_with("<local-command-caveat>")
317                        || text.starts_with("<local-command-stdout>")
318                ) =>
319            {
320                None
321            }
322            Self::Plain(text) => Some(text.clone()),
323            Self::Cmd(command) => Some(command.preview()),
324        }
325    }
326}
327
328impl<'de> Deserialize<'de> for ClaudeUserText {
329    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
330    where
331        D: serde::Deserializer<'de>,
332    {
333        let text = String::deserialize(deserializer)?;
334        Ok(self::ClaudeCmdInvocation::parse(&text)
335            .map(Self::Cmd)
336            .unwrap_or(Self::Plain(text)))
337    }
338}
339
340#[derive(Clone, Copy, Debug, Eq, PartialEq)]
341enum ClaudeCommandTag {
342    Name,
343    Args,
344}
345
346impl ClaudeCommandTag {
347    const fn open(self) -> &'static str {
348        match self {
349            Self::Name => "<command-name>",
350            Self::Args => "<command-args>",
351        }
352    }
353
354    const fn close(self) -> &'static str {
355        match self {
356            Self::Name => "</command-name>",
357            Self::Args => "</command-args>",
358        }
359    }
360}
361
362#[derive(Clone, Debug, Eq, PartialEq)]
363struct ClaudeCmdInvocation {
364    name: String,
365    args: Option<String>,
366}
367
368impl ClaudeCmdInvocation {
369    fn parse(text: &str) -> Option<Self> {
370        fn extract_tag(text: &str, tag: ClaudeCommandTag) -> Option<&str> {
371            let start = text.find(tag.open())?.saturating_add(tag.open().len());
372            let tail = text.get(start..)?;
373            let end = tail.find(tag.close())?.saturating_add(start);
374            text.get(start..end)
375        }
376
377        let name = extract_tag(text, ClaudeCommandTag::Name)
378            .map(str::trim)
379            .filter(|name| !name.is_empty())?
380            .to_owned();
381        let args = extract_tag(text, ClaudeCommandTag::Args)
382            .map(str::trim)
383            .filter(|args| !args.is_empty())
384            .map(str::to_owned);
385
386        Some(Self { name, args })
387    }
388
389    fn preview(&self) -> String {
390        let mut preview = self.name.clone();
391        if let Some(command_args) = self.args.as_deref().map(str::trim).filter(|args| !args.is_empty()) {
392            preview.push(' ');
393            preview.push_str(command_args);
394        }
395        preview
396    }
397}
398
399#[cfg(test)]
400mod tests {
401    use tempfile::tempdir;
402
403    use super::*;
404
405    #[test]
406    fn test_parse_claude_session_from_jsonl_lines_sets_workspace_and_id() {
407        let tempdir = tempdir().unwrap();
408        let workspace = tempdir.path().join("workspace");
409        std::fs::create_dir_all(&workspace).unwrap();
410
411        let content = concat!(
412            "{\"type\":\"file-history-snapshot\",\"messageId\":\"m1\",\"snapshot\":{},\"isSnapshotUpdate\":false}\n",
413            "{\"type\":\"progress\",\"timestamp\":\"2026-03-26T16:51:01.119Z\",\"cwd\":\"__CWD__\",\"sessionId\":\"8649a076-3ead-4d5a-9840-3200f0e1aae5\"}\n",
414            "{\"type\":\"last-prompt\",\"sessionId\":\"8649a076-3ead-4d5a-9840-3200f0e1aae5\",\"lastPrompt\":\"hello\"}\n"
415        )
416        .replace("__CWD__", &workspace.display().to_string());
417
418        assert2::assert!(let Ok(claude_session) = parse(&content));
419        let session = claude_session.into_session(workspace.join("session.jsonl"));
420        pretty_assertions::assert_eq!(session.agent, Agent::Claude);
421        pretty_assertions::assert_eq!(session.workspace, workspace);
422        pretty_assertions::assert_eq!(session.id, "8649a076-3ead-4d5a-9840-3200f0e1aae5");
423        pretty_assertions::assert_eq!(session.name, "workspace");
424        pretty_assertions::assert_eq!(session.search_text, "workspace");
425    }
426
427    #[test]
428    fn test_parse_claude_session_with_invalid_scanned_line_returns_error() {
429        let content = "{\"type\":\"progress\",\"timestamp\":\"not-a-date\",\"cwd\":\"/tmp/workspace\",\"sessionId\":\"8649a076-3ead-4d5a-9840-3200f0e1aae5\"}\n";
430
431        assert2::assert!(let Err(err) = parse(content));
432        assert!(err.to_string().contains("failed to parse Claude session json line"));
433    }
434
435    #[test]
436    fn test_parse_claude_session_without_metadata_returns_error() {
437        let content = "{\"type\":\"last-prompt\",\"sessionId\":\"8649a076-3ead-4d5a-9840-3200f0e1aae5\",\"lastPrompt\":\"hello\"}\n";
438        assert2::assert!(let Err(err) = parse(content));
439        assert!(err.to_string().contains("no Claude session record found"));
440    }
441
442    #[test]
443    fn test_parse_claude_session_indexes_user_and_assistant_text() {
444        let content = concat!(
445            "{\"type\":\"progress\",\"timestamp\":\"2026-03-26T16:51:01.119Z\",\"cwd\":\"/tmp/workspace\",\"sessionId\":\"8649a076-3ead-4d5a-9840-3200f0e1aae5\"}\n",
446            "{\"type\":\"user\",\"message\":{\"role\":\"user\",\"content\":\"this is a very long first user message\"},\"timestamp\":\"2026-03-26T16:52:02.119Z\",\"cwd\":\"/tmp/workspace\",\"sessionId\":\"8649a076-3ead-4d5a-9840-3200f0e1aae5\"}\n",
447            "{\"type\":\"assistant\",\"message\":{\"role\":\"assistant\",\"content\":[{\"type\":\"thinking\",\"thinking\":\"hidden\"},{\"type\":\"text\",\"text\":\"assistant answer\"},{\"type\":\"tool_use\",\"name\":\"Read\"}]},\"timestamp\":\"2026-03-26T16:53:02.119Z\",\"cwd\":\"/tmp/workspace\",\"sessionId\":\"8649a076-3ead-4d5a-9840-3200f0e1aae5\"}\n"
448        );
449
450        assert2::assert!(let Ok(claude_session) = parse(content));
451        let session = claude_session.into_session(PathBuf::from("session.jsonl"));
452        pretty_assertions::assert_eq!(session.name, "this is a very long first user message");
453        pretty_assertions::assert_eq!(
454            session.search_text,
455            "this is a very long first user message assistant answer"
456        );
457        pretty_assertions::assert_eq!(
458            session.updated_at,
459            chrono::DateTime::parse_from_rfc3339("2026-03-26T16:53:02.119Z")
460                .unwrap()
461                .to_utc()
462        );
463    }
464
465    #[test]
466    fn test_parse_claude_session_with_command_wrapper_sets_command_preview() {
467        let content = concat!(
468            "{\"type\":\"progress\",\"timestamp\":\"2026-03-26T16:51:01.119Z\",\"cwd\":\"/tmp/workspace\",\"sessionId\":\"8649a076-3ead-4d5a-9840-3200f0e1aae5\"}\n",
469            "{\"type\":\"user\",\"message\":{\"role\":\"user\",\"content\":\"<command-message>privoly-admin</command-message>\\n<command-name>/privoly-admin</command-name>\\n<command-args>install</command-args>\"},\"timestamp\":\"2026-03-26T16:52:02.119Z\",\"cwd\":\"/tmp/workspace\",\"sessionId\":\"8649a076-3ead-4d5a-9840-3200f0e1aae5\"}\n"
470        );
471
472        assert2::assert!(let Ok(claude_session) = parse(content));
473        let session = claude_session.into_session(PathBuf::from("session.jsonl"));
474        pretty_assertions::assert_eq!(session.name, "/privoly-admin install");
475        pretty_assertions::assert_eq!(session.search_text, "/privoly-admin install");
476    }
477
478    #[test]
479    fn test_parse_claude_session_skips_meta_and_tool_result_only_user_rows() {
480        let content = concat!(
481            "{\"type\":\"progress\",\"timestamp\":\"2026-03-26T16:51:01.119Z\",\"cwd\":\"/tmp/workspace\",\"sessionId\":\"8649a076-3ead-4d5a-9840-3200f0e1aae5\"}\n",
482            "{\"type\":\"user\",\"isMeta\":true,\"message\":{\"role\":\"user\",\"content\":\"<local-command-caveat>ignore me</local-command-caveat>\"},\"timestamp\":\"2026-03-26T16:52:02.119Z\",\"cwd\":\"/tmp/workspace\",\"sessionId\":\"8649a076-3ead-4d5a-9840-3200f0e1aae5\"}\n",
483            "{\"type\":\"user\",\"message\":{\"role\":\"user\",\"content\":[{\"type\":\"tool_result\",\"content\":\"ignored\"}]},\"timestamp\":\"2026-03-26T16:53:02.119Z\",\"cwd\":\"/tmp/workspace\",\"sessionId\":\"8649a076-3ead-4d5a-9840-3200f0e1aae5\"}\n",
484            "{\"type\":\"user\",\"message\":{\"role\":\"user\",\"content\":\"real prompt\"},\"timestamp\":\"2026-03-26T16:54:02.119Z\",\"cwd\":\"/tmp/workspace\",\"sessionId\":\"8649a076-3ead-4d5a-9840-3200f0e1aae5\"}\n"
485        );
486
487        assert2::assert!(let Ok(claude_session) = parse(content));
488        let session = claude_session.into_session(PathBuf::from("session.jsonl"));
489        pretty_assertions::assert_eq!(session.name, "real prompt");
490        pretty_assertions::assert_eq!(session.search_text, "real prompt");
491    }
492
493    #[test]
494    fn test_user_content_search_text_with_tool_result_then_text_returns_text() {
495        let value = serde_json::from_str::<ClaudeUserContent>(
496            r#"[{"type":"tool_result","content":"ignored"},{"type":"text","text":"later text"}]"#,
497        )
498        .unwrap();
499
500        pretty_assertions::assert_eq!(value.search_text(), Some("later text".to_owned()));
501    }
502
503    #[test]
504    fn test_deserialize_claude_user_content_part_text_with_command_wrapper_models_command() {
505        let value = serde_json::from_str::<ClaudeUserContent>(
506            r#"[{"type":"text","text":"<command-message>privoly-admin</command-message>\n<command-name>/privoly-admin</command-name>\n<command-args>install</command-args>"}]"#,
507        )
508        .unwrap();
509
510        pretty_assertions::assert_eq!(
511            value,
512            ClaudeUserContent::Parts(vec![ClaudeUserContentPart::Text {
513                text: ClaudeUserText::Cmd(ClaudeCmdInvocation {
514                    name: "/privoly-admin".to_owned(),
515                    args: Some("install".to_owned()),
516                }),
517            }])
518        );
519    }
520}