[bencode] Decode lists

This commit is contained in:
Fabian 2024-07-22 00:16:11 +02:00
parent 81017b3f90
commit e2b2f8efca

View File

@ -1,6 +1,6 @@
use std::collections::HashMap;
use anyhow::{anyhow, Result};
use anyhow::{anyhow, ensure, Result};
/*struct FileInfo {
length: i64,
@ -52,12 +52,15 @@ enum BencodeType {
impl Bencode {
pub fn decode(input: &str) -> Result<Bencode> {
Self::decode_type(input.as_bytes())
let (result, end_pos) = Self::decode_type(input.as_bytes())?;
ensure!(end_pos == input.len() - 1,
"Could not fully decode input. Got {} chars left to decode", input.len() - 1 - end_pos);
Ok(result)
}
fn decode_type(input: &[u8]) -> Result<Bencode>{
fn decode_type(input: &[u8]) -> Result<(Bencode, usize)>{
if input.len() == 0 {
return Err(anyhow!("Empty string is not valid bencode"));
return Err(anyhow!("Empty string is not valid bencode"))
}
let char = input[0] as char;
let type_ = match char {
@ -65,33 +68,50 @@ impl Bencode {
'l' => Ok(BencodeType::List),
'd' => Ok(BencodeType::Dict),
'0'..='9' => Ok(BencodeType::Bytes),
_ => Err(anyhow!("Unknown bencoding char: {char}")),
_ => Err(anyhow!("Invalid bencoding start char: {char}")),
}?;
let end_char = match type_ {
BencodeType::Integer | BencodeType::List | BencodeType::Dict=> 'e',
BencodeType::Bytes => ':'
};
let mut end_pos= 1;
for &c in &input[1..] {
if c == end_char as u8 {
break
}
end_pos += 1;
}
let to_decode = match type_ {
BencodeType::Integer | BencodeType::List | BencodeType::Dict=> &input[1..end_pos],
BencodeType::Bytes => {
let bytes_len = Self::decode_int_only(&input[0..end_pos])? as usize;
let bytes_start= end_pos + 1;
&input[bytes_start..bytes_start + bytes_len]
let end_to_decode = match type_ {
BencodeType::List | BencodeType::Dict => {
input.len()
},
BencodeType::Integer | BencodeType::Bytes => {
let end_char = match type_ {
BencodeType::Integer => Ok('e'),
BencodeType::Bytes => Ok(':'),
_ => Err(anyhow!("Should be logically impossible"))
}?;
let result = input.iter().position(|&x| x == end_char as u8).ok_or_else(|| anyhow!("Could not find terminating character {end_char}"))?;
result
}
};
let (to_decode, end_decoded) = match type_ {
BencodeType::Integer => (&input[1..end_to_decode], end_to_decode),
BencodeType::Bytes => {
let bytes_len = Self::decode_int_only(&input[0..end_to_decode])? as usize;
let bytes_start= end_to_decode + 1;
let end_pos = bytes_start + bytes_len;
(&input[bytes_start..end_pos], end_pos - 1)
},
BencodeType::List | BencodeType::Dict => (&input[1..end_to_decode], 0),
};
match type_ {
BencodeType::Integer => Self::decode_int(to_decode),
BencodeType::List => Self::dummy(to_decode),
BencodeType::Dict => Self::dummy(to_decode),
BencodeType::Bytes => Self::decode_bytes(to_decode),
BencodeType::Integer => {
let result = Self::decode_int(to_decode)?;
Ok((result, end_decoded))
},
BencodeType::Bytes => {
let result = Self::decode_bytes(to_decode)?;
Ok((result, end_decoded))
},
BencodeType::List => {
let (result, end_pos) = Self::decode_list(to_decode)?;
Ok((result, end_pos + 1))
},
BencodeType::Dict => {
let result = Self::dummy(to_decode)?;
Ok((result, end_decoded))
},
}
}
@ -109,6 +129,23 @@ impl Bencode {
Ok(Bencode::Bytes(ByteString::from_slice(input)))
}
fn decode_list(input: &[u8]) -> Result<(Bencode, usize)> {
let mut result = Vec::new();
let mut decoded_pos = 0;
loop {
if decoded_pos >= input.len() {
return Err(anyhow!("Unfinished list. Could not find terminating character 'e'"))
}
if input[decoded_pos] == 'e' as u8 {
break;
}
let (li_result, end_pos ) = Self::decode_type(&input[decoded_pos..])?;
result.push(li_result);
decoded_pos += end_pos + 1;
}
Ok((Bencode::List(result), decoded_pos))
}
fn dummy(_input: &[u8]) -> Result<Bencode> {
Ok(Bencode::Integer(42))
}
@ -146,16 +183,172 @@ mod tests {
assert_eq!(Bencode::decode_bytes("💩".as_bytes()).unwrap(), Bencode::Bytes(ByteString::from_str("💩")));
}
fn test_encode_bytes(input: &str) -> String {
format!("{}:{input}", input.len())
}
#[test]
fn test_bytes_str() {
let str = "hallo";
assert_eq!(Bencode::decode(format!("{}:{str}", str.len()).as_str()).unwrap(), Bencode::Bytes(ByteString::from_str("hallo")));
let str = "tschüss";
assert_eq!(Bencode::decode(format!("{}:{str}", str.len()).as_str()).unwrap(), Bencode::Bytes(ByteString::from_str("tschüss")));
let str = "💩";
assert_eq!(Bencode::decode(format!("{}:{str}", str.len()).as_str()).unwrap(), Bencode::Bytes(ByteString::from_str("💩")));
let str = "hallo 💩, this is a long text";
assert_eq!(Bencode::decode(format!("{}:{str}", str.len()).as_str()).unwrap(), Bencode::Bytes(ByteString::from_str("hallo 💩, this is a long text")));
assert_eq!(Bencode::decode(test_encode_bytes("hallo").as_str()).unwrap(), Bencode::Bytes(ByteString::from_str("hallo")));
assert_eq!(Bencode::decode(test_encode_bytes("tschüss").as_str()).unwrap(), Bencode::Bytes(ByteString::from_str("tschüss")));
assert_eq!(Bencode::decode(test_encode_bytes("💩").as_str()).unwrap(), Bencode::Bytes(ByteString::from_str("💩")));
assert_eq!(Bencode::decode(test_encode_bytes("hallo 💩, this is a long text").as_str()).unwrap(), Bencode::Bytes(ByteString::from_str("hallo 💩, this is a long text")));
}
#[test]
fn test_list() {
assert_eq!(Bencode::decode_list(
("e").as_bytes()).unwrap(),
(Bencode::List(Vec::new()), 0)
);
let str = "i42ee";
assert_eq!(Bencode::decode_list(
str.as_bytes()).unwrap(),
(
Bencode::List(vec![
Bencode::Integer(42),
]),
str.len() - 1,
)
);
let str = format!("{}e", test_encode_bytes("hallo"));
assert_eq!(Bencode::decode_list(
str.as_bytes()).unwrap(),
(
Bencode::List(vec![
Bencode::Bytes(ByteString::from_str("hallo")),
]),
str.len() - 1,
)
);
let str = format!("{}{}e", test_encode_bytes("hallo"), "i42e");
assert_eq!(Bencode::decode_list(
str.as_bytes()).unwrap(),
(
Bencode::List(vec![
Bencode::Bytes(ByteString::from_str("hallo")),
Bencode::Integer(42),
]),
str.len() - 1,
)
);
let str = format!("{}{}{}{}e", "i-17e", test_encode_bytes("hallo"), "i42e", test_encode_bytes("tschüssi💩"));
assert_eq!(Bencode::decode_list(
str.as_bytes()).unwrap(),
(
Bencode::List(vec![
Bencode::Integer(-17),
Bencode::Bytes(ByteString::from_str("hallo")),
Bencode::Integer(42),
Bencode::Bytes(ByteString::from_str("tschüssi💩")),
]),
str.len() - 1,
)
);
}
#[test]
fn test_multi_list() {
assert_eq!(Bencode::decode_list(
("lelee").as_bytes()).unwrap(),
(Bencode::List(vec![
Bencode::List(Vec::new()),
Bencode::List(Vec::new()),
]),
4)
);
let str = format!("l{}{}ee", test_encode_bytes("hallo"), "i42e");
assert_eq!(Bencode::decode_list(
str.as_bytes()).unwrap(),
(Bencode::List(vec![
Bencode::List(vec![
Bencode::Bytes(ByteString::from_str("hallo")),
Bencode::Integer(42),
]),
]),
str.len() - 1)
);
let str = format!("l{}{}el{}{}{}ee", test_encode_bytes("hallo"), "i42e", "i17e", test_encode_bytes("tschüss💩"), "i33e");
assert_eq!(Bencode::decode_list(
str.as_bytes()).unwrap(),
(Bencode::List(vec![
Bencode::List(vec![
Bencode::Bytes(ByteString::from_str("hallo")),
Bencode::Integer(42),
]),
Bencode::List(vec![
Bencode::Integer(17),
Bencode::Bytes(ByteString::from_str("tschüss💩")),
Bencode::Integer(33),
]),
]),
str.len() - 1)
);
}
#[test]
fn test_list_str() {
assert_eq!(Bencode::decode(
"le").unwrap(),
Bencode::List(Vec::new())
);
assert_eq!(Bencode::decode(
"li42ee").unwrap(),
Bencode::List(vec![
Bencode::Integer(42),
])
);
assert_eq!(Bencode::decode(
(format!("l{}e", test_encode_bytes("hallo"))).as_str()).unwrap(),
Bencode::List(vec![
Bencode::Bytes(ByteString::from_str("hallo")),
])
);
assert_eq!(Bencode::decode(
(format!("l{}{}e", test_encode_bytes("hallo"), "i42e")).as_str()).unwrap(),
Bencode::List(vec![
Bencode::Bytes(ByteString::from_str("hallo")),
Bencode::Integer(42),
])
);
assert_eq!(Bencode::decode(
(format!("l{}{}{}{}e", "i-17e", test_encode_bytes("hallo"), "i42e", test_encode_bytes("tschüssi💩"))).as_str()).unwrap(),
Bencode::List(vec![
Bencode::Integer(-17),
Bencode::Bytes(ByteString::from_str("hallo")),
Bencode::Integer(42),
Bencode::Bytes(ByteString::from_str("tschüssi💩")),
])
);
assert_eq!(Bencode::decode("llelee").unwrap(),
Bencode::List(vec![
Bencode::List(Vec::new()),
Bencode::List(Vec::new()),
]),
);
let str = format!("ll{}{}ee", test_encode_bytes("hallo"), "i42e");
assert_eq!(Bencode::decode(&str).unwrap(),
Bencode::List(vec![
Bencode::List(vec![
Bencode::Bytes(ByteString::from_str("hallo")),
Bencode::Integer(42),
]),
]),
);
let str = format!("ll{}{}el{}{}{}ee", test_encode_bytes("hallo"), "i42e", "i17e", test_encode_bytes("tschüss💩"), "i33e");
assert_eq!(Bencode::decode(&str).unwrap(),
Bencode::List(vec![
Bencode::List(vec![
Bencode::Bytes(ByteString::from_str("hallo")),
Bencode::Integer(42),
]),
Bencode::List(vec![
Bencode::Integer(17),
Bencode::Bytes(ByteString::from_str("tschüss💩")),
Bencode::Integer(33),
]),
]),
);
}
}