Skip to main content

ytil_agents/agent/session_parser/
codex.rs

1use std::path::PathBuf;
2
3use chrono::DateTime;
4use chrono::Utc;
5use rootcause::option_ext::OptionExt;
6use rootcause::prelude::ResultExt;
7use serde::Deserialize;
8use serde::de::IgnoredAny;
9
10use crate::agent::Agent;
11use crate::agent::session::SearchTextBuilder;
12use crate::agent::session::Session;
13
14/// Parse one Codex JSONL session file.
15///
16/// # Errors
17/// Returns an error when the JSONL cannot be parsed or required session metadata is missing.
18pub fn parse(content: &str, session_name: &str) -> rootcause::Result<CodexSession> {
19    let mut session_id = None;
20    let mut workspace_dir = None;
21    let mut created_at = None;
22    let mut updated_at = None;
23    let mut first_user_message = None;
24    let mut is_subagent = false;
25    let mut search_text = SearchTextBuilder::default();
26
27    for (line_idx, line) in content.lines().enumerate() {
28        let line = serde_json::from_str::<CodexLine>(line)
29            .context("failed to parse Codex session json line".to_owned())
30            .attach(format!("line_number={}", line_idx.saturating_add(1)))
31            .attach(format!("line={line}"))?;
32
33        if let Some(timestamp) = line.timestamp() {
34            updated_at = Some(timestamp);
35        }
36
37        if let Some(meta) = line.session_meta() {
38            session_id.get_or_insert_with(|| meta.id.clone());
39            workspace_dir.get_or_insert_with(|| PathBuf::from(&meta.cwd));
40            created_at.get_or_insert(meta.timestamp);
41            is_subagent |= meta.is_subagent();
42        }
43
44        if let Some(user_message) = line.user_search_text() {
45            if first_user_message.is_none() {
46                first_user_message = Some(user_message.clone());
47            }
48            search_text.push(&user_message);
49        }
50        if let Some(assistant_message) = line.assistant_search_text() {
51            search_text.push(&assistant_message);
52        }
53    }
54
55    let session_id = session_id
56        .context("no Codex session_meta record found".to_owned())
57        .attach(format!("session_name={session_name}"))?;
58    let workspace_dir = workspace_dir
59        .context("no Codex session_meta record found".to_owned())
60        .attach(format!("session_name={session_name}"))?;
61    let created_at = created_at
62        .context("no Codex session_meta record found".to_owned())
63        .attach(format!("session_name={session_name}"))?;
64
65    let name = first_user_message.unwrap_or_else(|| session_name.to_owned());
66    let search_text = search_text.build(&name);
67
68    Ok(CodexSession {
69        id: session_id,
70        name,
71        search_text,
72        workspace: workspace_dir,
73        created_at,
74        updated_at: updated_at.unwrap_or(created_at),
75        is_subagent,
76    })
77}
78
79#[derive(Clone, Debug, Eq, PartialEq)]
80pub struct CodexSession {
81    pub id: String,
82    pub name: String,
83    pub search_text: String,
84    pub workspace: PathBuf,
85    pub created_at: DateTime<Utc>,
86    pub updated_at: DateTime<Utc>,
87    pub is_subagent: bool,
88}
89
90impl CodexSession {
91    pub fn into_session(self, path: PathBuf) -> Session {
92        let mut session = Session::new(Agent::Codex, self.id, self.workspace, path, None, self.created_at);
93        session.name = self.name;
94        session.search_text = self.search_text;
95        session.updated_at = self.updated_at;
96        session
97    }
98}
99
100#[derive(Debug, Deserialize)]
101#[serde(tag = "type")]
102enum CodexLine {
103    #[serde(rename = "session_meta")]
104    SessionMeta(CodexSessionMetaLine),
105    #[serde(rename = "event_msg")]
106    EventMsg(CodexEventMsgLine),
107    #[serde(rename = "response_item")]
108    ResponseItem(CodexResponseItemLine),
109    #[serde(alias = "turn_context")]
110    #[serde(alias = "compacted")]
111    Timestamped(CodexTimestampedLine),
112    #[serde(other)]
113    Other,
114}
115
116impl CodexLine {
117    const fn timestamp(&self) -> Option<DateTime<Utc>> {
118        match self {
119            Self::SessionMeta(line) => Some(line.timestamp),
120            Self::EventMsg(line) => Some(line.timestamp),
121            Self::ResponseItem(line) => Some(line.timestamp),
122            Self::Timestamped(line) => Some(line.timestamp),
123            Self::Other => None,
124        }
125    }
126
127    const fn session_meta(&self) -> Option<&CodexSessionMetaPayload> {
128        match self {
129            Self::SessionMeta(line) => Some(&line.payload),
130            Self::EventMsg(_) | Self::ResponseItem(_) | Self::Timestamped(_) | Self::Other => None,
131        }
132    }
133
134    fn user_search_text(&self) -> Option<String> {
135        match self {
136            Self::EventMsg(line) => line.user_search_text(),
137            Self::SessionMeta(_) | Self::ResponseItem(_) | Self::Timestamped(_) | Self::Other => None,
138        }
139    }
140
141    fn assistant_search_text(&self) -> Option<String> {
142        match self {
143            Self::ResponseItem(line) => line.assistant_search_text(),
144            Self::SessionMeta(_) | Self::EventMsg(_) | Self::Timestamped(_) | Self::Other => None,
145        }
146    }
147}
148
149#[derive(Debug, Deserialize)]
150struct CodexSessionMetaLine {
151    #[serde(rename = "timestamp")]
152    timestamp: DateTime<Utc>,
153    payload: CodexSessionMetaPayload,
154}
155
156#[derive(Debug, Deserialize)]
157struct CodexSessionMetaPayload {
158    id: String,
159    cwd: String,
160    timestamp: DateTime<Utc>,
161    source: Option<serde_json::Value>,
162}
163
164impl CodexSessionMetaPayload {
165    fn is_subagent(&self) -> bool {
166        self.source
167            .as_ref()
168            .is_some_and(|source| source.get("subagent").is_some())
169    }
170}
171
172#[derive(Debug, Deserialize)]
173struct CodexEventMsgLine {
174    #[serde(rename = "timestamp")]
175    timestamp: DateTime<Utc>,
176    payload: CodexEventPayload,
177}
178
179impl CodexEventMsgLine {
180    fn user_search_text(&self) -> Option<String> {
181        match &self.payload {
182            CodexEventPayload::UserMessage { message } => Some(message.clone()),
183            CodexEventPayload::Other => None,
184        }
185    }
186}
187
188#[derive(Debug, Deserialize)]
189#[serde(tag = "type")]
190enum CodexEventPayload {
191    #[serde(rename = "user_message")]
192    UserMessage { message: String },
193    #[serde(other)]
194    Other,
195}
196
197#[derive(Debug, Deserialize)]
198struct CodexResponseItemLine {
199    timestamp: DateTime<Utc>,
200    payload: CodexResponseItemPayload,
201}
202
203impl CodexResponseItemLine {
204    fn assistant_search_text(&self) -> Option<String> {
205        match &self.payload {
206            CodexResponseItemPayload::Message { role, content } if role == "assistant" => {
207                let mut search_text = SearchTextBuilder::default();
208                for snippet in content
209                    .iter()
210                    .filter_map(CodexMessageContentPart::assistant_search_text)
211                {
212                    search_text.push(snippet);
213                }
214                let message = search_text.build("");
215                (!message.is_empty()).then_some(message)
216            }
217            CodexResponseItemPayload::Message { .. }
218            | CodexResponseItemPayload::Reasoning
219            | CodexResponseItemPayload::Other => None,
220        }
221    }
222}
223
224#[derive(Debug, Deserialize)]
225#[serde(tag = "type")]
226enum CodexResponseItemPayload {
227    #[serde(rename = "message")]
228    Message {
229        role: String,
230        #[serde(default)]
231        content: Vec<CodexMessageContentPart>,
232    },
233    #[serde(rename = "reasoning")]
234    Reasoning,
235    #[serde(other)]
236    Other,
237}
238
239#[derive(Debug, Deserialize)]
240#[serde(tag = "type")]
241enum CodexMessageContentPart {
242    #[serde(rename = "output_text")]
243    OutputText { text: String },
244    #[serde(rename = "input_text")]
245    InputText {
246        #[serde(rename = "text")]
247        _text: IgnoredAny,
248    },
249    #[serde(other)]
250    Other,
251}
252
253impl CodexMessageContentPart {
254    fn assistant_search_text(&self) -> Option<&str> {
255        match self {
256            Self::OutputText { text } => Some(text),
257            Self::InputText { .. } | Self::Other => None,
258        }
259    }
260}
261
262#[derive(Debug, Deserialize)]
263struct CodexTimestampedLine {
264    timestamp: DateTime<Utc>,
265}
266
267#[cfg(test)]
268mod tests {
269    use tempfile::tempdir;
270
271    use super::*;
272
273    #[test]
274    fn test_parse_codex_session_from_session_meta_uses_session_name_fallback() {
275        let tempdir = tempdir().unwrap();
276        let workspace = tempdir.path().join("workspace");
277        std::fs::create_dir_all(&workspace).unwrap();
278
279        let content = format!(
280            "{{\"timestamp\":\"2026-03-20T06:30:20.312Z\",\"type\":\"session_meta\",\"payload\":{{\"id\":\"019d09f0-0d96-7e23-94cd-1f6aad7cdc09\",\"timestamp\":\"2026-03-20T06:30:20.312Z\",\"cwd\":\"{}\",\"name\":\"Dotfiles\"}}}}\n",
281            workspace.display()
282        );
283
284        assert2::assert!(let Ok(codex_session) = parse(
285            &content,
286            "rollout-2026-03-20T07-30-20-019d09f0-0d96-7e23-94cd-1f6aad7cdc09",
287        ));
288        let session = codex_session.into_session(workspace.join("session.jsonl"));
289        pretty_assertions::assert_eq!(session.agent, Agent::Codex);
290        pretty_assertions::assert_eq!(
291            session.name,
292            "rollout-2026-03-20T07-30-20-019d09f0-0d96-7e23-94cd-1f6aad7cdc09"
293        );
294        pretty_assertions::assert_eq!(
295            session.search_text,
296            "rollout-2026-03-20T07-30-20-019d09f0-0d96-7e23-94cd-1f6aad7cdc09"
297        );
298        pretty_assertions::assert_eq!(session.workspace, workspace);
299    }
300
301    #[test]
302    fn test_parse_codex_session_indexes_user_and_assistant_text_and_updated_at() {
303        let content = concat!(
304            "{\"timestamp\":\"2026-03-20T06:30:20.312Z\",\"type\":\"session_meta\",\"payload\":{\"id\":\"019d09f0-0d96-7e23-94cd-1f6aad7cdc09\",\"timestamp\":\"2026-03-20T06:30:20.312Z\",\"cwd\":\"/tmp/workspace\"}}\n",
305            "{\"timestamp\":\"2026-03-20T06:31:20.312Z\",\"type\":\"event_msg\",\"payload\":{\"type\":\"user_message\",\"message\":\"why can't I jump with rust-analyzer to these types?\"}}\n",
306            "{\"timestamp\":\"2026-03-20T06:32:20.312Z\",\"type\":\"response_item\",\"payload\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Because that symbol is re-exported.\"},{\"type\":\"input_text\",\"text\":\"ignored\"}]}}\n",
307            "{\"timestamp\":\"2026-03-20T06:33:20.312Z\",\"type\":\"response_item\",\"payload\":{\"type\":\"reasoning\",\"text\":\"hidden\"}}\n"
308        );
309
310        assert2::assert!(let Ok(session) = parse(content, "fallback-name"));
311        pretty_assertions::assert_eq!(session.name, "why can't I jump with rust-analyzer to these types?");
312        pretty_assertions::assert_eq!(
313            session.search_text,
314            "why can't I jump with rust-analyzer to these types? Because that symbol is re-exported."
315        );
316        pretty_assertions::assert_eq!(
317            session.updated_at,
318            chrono::DateTime::parse_from_rfc3339("2026-03-20T06:33:20.312Z")
319                .unwrap()
320                .to_utc()
321        );
322    }
323
324    #[test]
325    fn test_parse_codex_session_ignores_non_assistant_response_text() {
326        let content = concat!(
327            "{\"timestamp\":\"2026-03-20T06:30:20.312Z\",\"type\":\"session_meta\",\"payload\":{\"id\":\"019d09f0-0d96-7e23-94cd-1f6aad7cdc09\",\"timestamp\":\"2026-03-20T06:30:20.312Z\",\"cwd\":\"/tmp/workspace\"}}\n",
328            "{\"timestamp\":\"2026-03-20T06:31:20.312Z\",\"type\":\"event_msg\",\"payload\":{\"type\":\"user_message\",\"message\":\"first user msg\"}}\n",
329            "{\"timestamp\":\"2026-03-20T06:32:20.312Z\",\"type\":\"response_item\",\"payload\":{\"type\":\"message\",\"role\":\"user\",\"content\":[{\"type\":\"output_text\",\"text\":\"should not index\"}]}}\n"
330        );
331
332        assert2::assert!(let Ok(session) = parse(content, "fallback-name"));
333        pretty_assertions::assert_eq!(session.search_text, "first user msg");
334    }
335
336    #[test]
337    fn test_parse_codex_session_when_source_is_subagent_marks_session() {
338        let content = concat!(
339            "{\"timestamp\":\"2026-03-20T06:30:20.312Z\",\"type\":\"session_meta\",\"payload\":{\"id\":\"019d09f0-0d96-7e23-94cd-1f6aad7cdc09\",\"timestamp\":\"2026-03-20T06:30:20.312Z\",\"cwd\":\"/tmp/workspace\",\"source\":{\"subagent\":{\"other\":\"guardian\"}}}}\n",
340            "{\"timestamp\":\"2026-03-20T06:31:20.312Z\",\"type\":\"event_msg\",\"payload\":{\"type\":\"user_message\",\"message\":\"The following is the Codex agent history\"}}\n"
341        );
342
343        assert2::assert!(let Ok(session) = parse(content, "fallback-name"));
344        assert!(session.is_subagent);
345    }
346
347    #[test]
348    fn test_parse_codex_session_with_invalid_scanned_line_returns_error() {
349        let content = "{\"timestamp\":\"not-a-date\",\"type\":\"session_meta\",\"payload\":{\"id\":\"019d09f0-0d96-7e23-94cd-1f6aad7cdc09\",\"timestamp\":\"2026-03-20T06:30:20.312Z\",\"cwd\":\"/tmp/workspace\"}}\n";
350
351        assert2::assert!(let Err(err) = parse(content, "fallback-name"));
352        assert!(err.to_string().contains("failed to parse Codex session json line"));
353    }
354}