From e2b2f8efca61dda64010ce9ce017b6cf833324c0 Mon Sep 17 00:00:00 2001 From: Faerbit Date: Mon, 22 Jul 2024 00:16:11 +0200 Subject: [PATCH] [bencode] Decode lists --- src/bencode.rs | 261 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 227 insertions(+), 34 deletions(-) diff --git a/src/bencode.rs b/src/bencode.rs index 89ba5d2..254d74d 100644 --- a/src/bencode.rs +++ b/src/bencode.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, ensure, Result}; /*struct FileInfo { length: i64, @@ -52,12 +52,15 @@ enum BencodeType { impl Bencode { pub fn decode(input: &str) -> Result { - Self::decode_type(input.as_bytes()) + let (result, end_pos) = Self::decode_type(input.as_bytes())?; + ensure!(end_pos == input.len() - 1, + "Could not fully decode input. Got {} chars left to decode", input.len() - 1 - end_pos); + Ok(result) } - fn decode_type(input: &[u8]) -> Result{ + fn decode_type(input: &[u8]) -> Result<(Bencode, usize)>{ if input.len() == 0 { - return Err(anyhow!("Empty string is not valid bencode")); + return Err(anyhow!("Empty string is not valid bencode")) } let char = input[0] as char; let type_ = match char { @@ -65,33 +68,50 @@ impl Bencode { 'l' => Ok(BencodeType::List), 'd' => Ok(BencodeType::Dict), '0'..='9' => Ok(BencodeType::Bytes), - _ => Err(anyhow!("Unknown bencoding char: {char}")), + _ => Err(anyhow!("Invalid bencoding start char: {char}")), }?; - let end_char = match type_ { - BencodeType::Integer | BencodeType::List | BencodeType::Dict=> 'e', - BencodeType::Bytes => ':' - }; - let mut end_pos= 1; - for &c in &input[1..] { - if c == end_char as u8 { - break - } - end_pos += 1; - } - let to_decode = match type_ { - BencodeType::Integer | BencodeType::List | BencodeType::Dict=> &input[1..end_pos], - BencodeType::Bytes => { - let bytes_len = Self::decode_int_only(&input[0..end_pos])? as usize; - let bytes_start= end_pos + 1; - &input[bytes_start..bytes_start + bytes_len] + let end_to_decode = match type_ { + BencodeType::List | BencodeType::Dict => { + input.len() }, + BencodeType::Integer | BencodeType::Bytes => { + let end_char = match type_ { + BencodeType::Integer => Ok('e'), + BencodeType::Bytes => Ok(':'), + _ => Err(anyhow!("Should be logically impossible")) + }?; + let result = input.iter().position(|&x| x == end_char as u8).ok_or_else(|| anyhow!("Could not find terminating character {end_char}"))?; + result + } + }; + let (to_decode, end_decoded) = match type_ { + BencodeType::Integer => (&input[1..end_to_decode], end_to_decode), + BencodeType::Bytes => { + let bytes_len = Self::decode_int_only(&input[0..end_to_decode])? as usize; + let bytes_start= end_to_decode + 1; + let end_pos = bytes_start + bytes_len; + (&input[bytes_start..end_pos], end_pos - 1) + }, + BencodeType::List | BencodeType::Dict => (&input[1..end_to_decode], 0), }; match type_ { - BencodeType::Integer => Self::decode_int(to_decode), - BencodeType::List => Self::dummy(to_decode), - BencodeType::Dict => Self::dummy(to_decode), - BencodeType::Bytes => Self::decode_bytes(to_decode), + BencodeType::Integer => { + let result = Self::decode_int(to_decode)?; + Ok((result, end_decoded)) + }, + BencodeType::Bytes => { + let result = Self::decode_bytes(to_decode)?; + Ok((result, end_decoded)) + }, + BencodeType::List => { + let (result, end_pos) = Self::decode_list(to_decode)?; + Ok((result, end_pos + 1)) + }, + BencodeType::Dict => { + let result = Self::dummy(to_decode)?; + Ok((result, end_decoded)) + }, } } @@ -109,6 +129,23 @@ impl Bencode { Ok(Bencode::Bytes(ByteString::from_slice(input))) } + fn decode_list(input: &[u8]) -> Result<(Bencode, usize)> { + let mut result = Vec::new(); + let mut decoded_pos = 0; + loop { + if decoded_pos >= input.len() { + return Err(anyhow!("Unfinished list. Could not find terminating character 'e'")) + } + if input[decoded_pos] == 'e' as u8 { + break; + } + let (li_result, end_pos ) = Self::decode_type(&input[decoded_pos..])?; + result.push(li_result); + decoded_pos += end_pos + 1; + } + Ok((Bencode::List(result), decoded_pos)) + } + fn dummy(_input: &[u8]) -> Result { Ok(Bencode::Integer(42)) } @@ -146,16 +183,172 @@ mod tests { assert_eq!(Bencode::decode_bytes("💩".as_bytes()).unwrap(), Bencode::Bytes(ByteString::from_str("💩"))); } + fn test_encode_bytes(input: &str) -> String { + format!("{}:{input}", input.len()) + } + #[test] fn test_bytes_str() { - let str = "hallo"; - assert_eq!(Bencode::decode(format!("{}:{str}", str.len()).as_str()).unwrap(), Bencode::Bytes(ByteString::from_str("hallo"))); - let str = "tschüss"; - assert_eq!(Bencode::decode(format!("{}:{str}", str.len()).as_str()).unwrap(), Bencode::Bytes(ByteString::from_str("tschüss"))); - let str = "💩"; - assert_eq!(Bencode::decode(format!("{}:{str}", str.len()).as_str()).unwrap(), Bencode::Bytes(ByteString::from_str("💩"))); - let str = "hallo 💩, this is a long text"; - assert_eq!(Bencode::decode(format!("{}:{str}", str.len()).as_str()).unwrap(), Bencode::Bytes(ByteString::from_str("hallo 💩, this is a long text"))); + assert_eq!(Bencode::decode(test_encode_bytes("hallo").as_str()).unwrap(), Bencode::Bytes(ByteString::from_str("hallo"))); + assert_eq!(Bencode::decode(test_encode_bytes("tschüss").as_str()).unwrap(), Bencode::Bytes(ByteString::from_str("tschüss"))); + assert_eq!(Bencode::decode(test_encode_bytes("💩").as_str()).unwrap(), Bencode::Bytes(ByteString::from_str("💩"))); + assert_eq!(Bencode::decode(test_encode_bytes("hallo 💩, this is a long text").as_str()).unwrap(), Bencode::Bytes(ByteString::from_str("hallo 💩, this is a long text"))); + } + + #[test] + fn test_list() { + assert_eq!(Bencode::decode_list( + ("e").as_bytes()).unwrap(), + (Bencode::List(Vec::new()), 0) + ); + let str = "i42ee"; + assert_eq!(Bencode::decode_list( + str.as_bytes()).unwrap(), + ( + Bencode::List(vec![ + Bencode::Integer(42), + ]), + str.len() - 1, + ) + ); + let str = format!("{}e", test_encode_bytes("hallo")); + assert_eq!(Bencode::decode_list( + str.as_bytes()).unwrap(), + ( + Bencode::List(vec![ + Bencode::Bytes(ByteString::from_str("hallo")), + ]), + str.len() - 1, + ) + ); + let str = format!("{}{}e", test_encode_bytes("hallo"), "i42e"); + assert_eq!(Bencode::decode_list( + str.as_bytes()).unwrap(), + ( + Bencode::List(vec![ + Bencode::Bytes(ByteString::from_str("hallo")), + Bencode::Integer(42), + ]), + str.len() - 1, + ) + ); + let str = format!("{}{}{}{}e", "i-17e", test_encode_bytes("hallo"), "i42e", test_encode_bytes("tschüssi💩")); + assert_eq!(Bencode::decode_list( + str.as_bytes()).unwrap(), + ( + Bencode::List(vec![ + Bencode::Integer(-17), + Bencode::Bytes(ByteString::from_str("hallo")), + Bencode::Integer(42), + Bencode::Bytes(ByteString::from_str("tschüssi💩")), + ]), + str.len() - 1, + ) + ); + } + + #[test] + fn test_multi_list() { + assert_eq!(Bencode::decode_list( + ("lelee").as_bytes()).unwrap(), + (Bencode::List(vec![ + Bencode::List(Vec::new()), + Bencode::List(Vec::new()), + ]), + 4) + ); + let str = format!("l{}{}ee", test_encode_bytes("hallo"), "i42e"); + assert_eq!(Bencode::decode_list( + str.as_bytes()).unwrap(), + (Bencode::List(vec![ + Bencode::List(vec![ + Bencode::Bytes(ByteString::from_str("hallo")), + Bencode::Integer(42), + ]), + ]), + str.len() - 1) + ); + let str = format!("l{}{}el{}{}{}ee", test_encode_bytes("hallo"), "i42e", "i17e", test_encode_bytes("tschüss💩"), "i33e"); + assert_eq!(Bencode::decode_list( + str.as_bytes()).unwrap(), + (Bencode::List(vec![ + Bencode::List(vec![ + Bencode::Bytes(ByteString::from_str("hallo")), + Bencode::Integer(42), + ]), + Bencode::List(vec![ + Bencode::Integer(17), + Bencode::Bytes(ByteString::from_str("tschüss💩")), + Bencode::Integer(33), + ]), + ]), + str.len() - 1) + ); + } + + #[test] + fn test_list_str() { + assert_eq!(Bencode::decode( + "le").unwrap(), + Bencode::List(Vec::new()) + ); + assert_eq!(Bencode::decode( + "li42ee").unwrap(), + Bencode::List(vec![ + Bencode::Integer(42), + ]) + ); + assert_eq!(Bencode::decode( + (format!("l{}e", test_encode_bytes("hallo"))).as_str()).unwrap(), + Bencode::List(vec![ + Bencode::Bytes(ByteString::from_str("hallo")), + ]) + ); + assert_eq!(Bencode::decode( + (format!("l{}{}e", test_encode_bytes("hallo"), "i42e")).as_str()).unwrap(), + Bencode::List(vec![ + Bencode::Bytes(ByteString::from_str("hallo")), + Bencode::Integer(42), + ]) + ); + assert_eq!(Bencode::decode( + (format!("l{}{}{}{}e", "i-17e", test_encode_bytes("hallo"), "i42e", test_encode_bytes("tschüssi💩"))).as_str()).unwrap(), + Bencode::List(vec![ + Bencode::Integer(-17), + Bencode::Bytes(ByteString::from_str("hallo")), + Bencode::Integer(42), + Bencode::Bytes(ByteString::from_str("tschüssi💩")), + ]) + ); + assert_eq!(Bencode::decode("llelee").unwrap(), + Bencode::List(vec![ + Bencode::List(Vec::new()), + Bencode::List(Vec::new()), + ]), + ); + let str = format!("ll{}{}ee", test_encode_bytes("hallo"), "i42e"); + assert_eq!(Bencode::decode(&str).unwrap(), + Bencode::List(vec![ + Bencode::List(vec![ + Bencode::Bytes(ByteString::from_str("hallo")), + Bencode::Integer(42), + ]), + ]), + ); + let str = format!("ll{}{}el{}{}{}ee", test_encode_bytes("hallo"), "i42e", "i17e", test_encode_bytes("tschüss💩"), "i33e"); + assert_eq!(Bencode::decode(&str).unwrap(), + Bencode::List(vec![ + Bencode::List(vec![ + Bencode::Bytes(ByteString::from_str("hallo")), + Bencode::Integer(42), + ]), + Bencode::List(vec![ + Bencode::Integer(17), + Bencode::Bytes(ByteString::from_str("tschüss💩")), + Bencode::Integer(33), + ]), + ]), + ); } }