From 37c337c467b63c50dc0b28ee6b87e9736908decf Mon Sep 17 00:00:00 2001 From: Faerbit Date: Sat, 3 Aug 2024 18:10:05 +0200 Subject: [PATCH] Move stuff --- src/bencode.rs | 561 +----------------------- src/bencode/custom.rs | 559 +++++++++++++++++++++++ src/{serde_bencode.rs => bencode/de.rs} | 2 +- src/main.rs | 3 +- 4 files changed, 563 insertions(+), 562 deletions(-) create mode 100644 src/bencode/custom.rs rename src/{serde_bencode.rs => bencode/de.rs} (99%) diff --git a/src/bencode.rs b/src/bencode.rs index 3ec0578..1673fce 100644 --- a/src/bencode.rs +++ b/src/bencode.rs @@ -1,559 +1,2 @@ -use std::collections::HashMap; -use std::fmt::{Display, Formatter}; - -use anyhow::{anyhow, ensure, Result}; -use sha1::{Digest, Sha1}; - - -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)] -pub struct ByteString(Vec); - -impl Display for ByteString { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - if let Ok(str) = std::str::from_utf8(&*self.0) { - write!(f, "{}", str) - } else { - let str_ints: Vec<_> = self.0.iter().map(|x| x.to_string()).collect(); - write!(f, "{{{}}}", str_ints.join(", ")) - } - } -} - -impl ByteString { - - pub fn to_vec(&self) -> &Vec { - &self.0 - } - pub fn to_string(&self) -> Result { - let result = std::str::from_utf8(&self.0[..])?; - Ok(result.to_string()) - } - #[allow(dead_code)] - pub fn from_str(input: &str) -> Self { - ByteString(input.as_bytes().to_vec()) - } - - pub fn from_slice(input: &[u8]) -> Self { - ByteString(input.to_vec()) - } -} - -#[derive(Debug, PartialEq)] -enum BencodeType { - Integer, - Bytes, - List, - Dict, -} - -#[derive(Debug, PartialEq)] -pub enum Bencode { - Integer(i64), - Bytes(ByteString), - List(Vec), - Dict(HashMap), -} - -impl Display for Bencode { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - Bencode::Integer(i) => { write!(f, "{{i: {}}}", i) } - Bencode::Bytes(b) => { write!(f, "{{b: {}}}", b) } - Bencode::List(l) => { - let strs: Vec<_> = l.iter().map(|x| x.to_string()).collect(); - write!(f, "{{l: [\n{}\n]}}", strs.join(",\n")) - } - Bencode::Dict(d) => { - write!(f, "{{d: {{\n")?; - for (k, v) in d { - write!(f, " {k} : {v}\n")?; - } - write!(f, "\n}} }}") - } - } - - } -} - -impl Bencode { - - pub fn compose(&self) -> Vec { - match self { - Bencode::Integer(i) => {format!("i{}e", i).as_bytes().to_vec()} - Bencode::Bytes(b) => {[format!("{}:", b.0.len()).as_bytes(), &b.0[..]].concat()} - Bencode::List(l) => { - let mut result = vec!['l' as u8]; - for item in l { - result.extend(item.compose()) - } - result.push('e' as u8); - result - } - Bencode::Dict(d) => { - let mut result = vec!['d' as u8]; - let mut sorted_keys = d.keys().collect::>(); - sorted_keys.sort(); - - for k in sorted_keys { - result.extend(Self::compose(&Bencode::Bytes(k.clone()))); - let v = &d[k]; - result.extend(v.compose()); - } - result.push('e' as u8); - result - } - } - } - pub fn parse(input: &[u8]) -> Result { - let (result, end_pos) = Self::parse_type(input)?; - ensure!(end_pos == input.len() - 1, - "Could not fully decode input. Got {} chars left to decode", input.len() - 1 - end_pos); - Ok(result) - } - - pub fn sha1(&self) -> [u8; 20] { - let mut hasher = Sha1::new(); - - let composed = self.compose(); - hasher.update(composed); - - hasher.finalize().into() - } - - fn parse_type(input: &[u8]) -> Result<(Bencode, usize)>{ - if input.len() == 0 { - return Err(anyhow!("Empty string is not valid bencode")) - } - let char = input[0] as char; - let type_ = match char { - 'i' => Ok(BencodeType::Integer), - 'l' => Ok(BencodeType::List), - 'd' => Ok(BencodeType::Dict), - '0'..='9' => Ok(BencodeType::Bytes), - _ => Err(anyhow!("Invalid bencoding start char: {char}")), - }?; - let end_to_decode = match type_ { - BencodeType::List | BencodeType::Dict => { - input.len() - }, - BencodeType::Integer | BencodeType::Bytes => { - let end_char = match type_ { - BencodeType::Integer => Ok('e'), - BencodeType::Bytes => Ok(':'), - _ => Err(anyhow!("Should be logically impossible")) - }?; - let result = input.iter().position(|&x| x == end_char as u8).ok_or_else(|| anyhow!("Could not find terminating character {end_char}"))?; - result - } - }; - let (to_decode, end_decoded) = match type_ { - BencodeType::Integer => (&input[1..end_to_decode], end_to_decode), - BencodeType::Bytes => { - let bytes_len = Self::parse_int_only(&input[0..end_to_decode])? as usize; - let bytes_start= end_to_decode + 1; - let end_pos = bytes_start + bytes_len; - (&input[bytes_start..end_pos], end_pos - 1) - }, - BencodeType::List | BencodeType::Dict => (&input[1..end_to_decode], 0), - }; - - match type_ { - BencodeType::Integer => { - let result = Self::parse_int(to_decode)?; - Ok((result, end_decoded)) - }, - BencodeType::Bytes => { - let result = Self::parse_bytes(to_decode)?; - Ok((result, end_decoded)) - }, - BencodeType::List => { - let (result, end_pos) = Self::parse_list(to_decode)?; - Ok((result, end_pos + 1)) - }, - BencodeType::Dict => { - let (result, end_pos) = Self::parse_dict(to_decode)?; - Ok((result, end_pos + 1)) - }, - } - } - - fn parse_int_only(input: &[u8]) -> Result { - let int_str = std::str::from_utf8(input)?; - int_str.parse::().map_err(anyhow::Error::msg) - } - - fn parse_int(input: &[u8]) -> Result { - let int = Self::parse_int_only(input)?; - Ok(Bencode::Integer(int)) - } - - fn parse_bytes(input: &[u8]) -> Result { - Ok(Bencode::Bytes(ByteString::from_slice(input))) - } - - fn parse_list(input: &[u8]) -> Result<(Bencode, usize)> { - let mut result = Vec::new(); - let mut decoded_pos = 0; - loop { - if decoded_pos >= input.len() { - return Err(anyhow!("Unfinished list. Could not find terminating character 'e'")) - } - if input[decoded_pos] == 'e' as u8 { - break; - } - let (li_result, end_pos ) = Self::parse_type(&input[decoded_pos..])?; - result.push(li_result); - decoded_pos += end_pos + 1; - } - Ok((Bencode::List(result), decoded_pos)) - } - - fn parse_dict(input: &[u8]) -> Result<(Bencode, usize)> { - let mut result = HashMap::new(); - let mut decoded_pos = 0; - loop { - if decoded_pos >= input.len() { - return Err(anyhow!("Unfinished dict. Could not find terminating character 'e'")) - } - if input[decoded_pos] == 'e' as u8 { - break; - } - let (Bencode::Bytes(key_result), end_pos) = Self::parse_type(&input[decoded_pos..])? else { - return Err(anyhow!("Type of dictionary key not Bytes")) - }; - decoded_pos += end_pos + 1; - let (value_result, end_pos) = Self::parse_type(&input[decoded_pos..])?; - result.insert(key_result, value_result); - decoded_pos += end_pos + 1; - } - Ok((Bencode::Dict(result), decoded_pos)) - } -} - -#[cfg(test)] -mod tests { - use hex_literal::hex; - use super::*; - - #[test] - fn test_sha1() { - assert_eq!(Bencode::Integer(42).sha1(), hex!("3ce69356df4222111c27b41cccf2164e6cced799")); - assert_eq!(Bencode::Bytes(ByteString::from_str("foo")).sha1(), hex!("a8f4559a9623f25c4d5f1155f31a2604c55e1334")); - } - - #[test] - fn test_compose_int() { - assert_eq!(Bencode::Integer(42).compose(), "i42e".as_bytes()); - assert_eq!(Bencode::Integer(17).compose(), "i17e".as_bytes()); - } - - #[test] - fn test_compose_bytes() { - assert_eq!(Bencode::Bytes(ByteString::from_str("foo")).compose(), "3:foo".as_bytes()); - assert_eq!(Bencode::Bytes(ByteString::from_str("bar")).compose(), "3:bar".as_bytes()); - } - - #[test] - fn test_compose_list() { - assert_eq!(Bencode::List(vec![Bencode::Bytes(ByteString::from_str("foo")), Bencode::Integer(42)]).compose(), "l3:fooi42ee".as_bytes()); - } - - #[test] - fn test_compose_dict() { - assert_eq!(Bencode::Dict(HashMap::from([(ByteString::from_str("foo"), Bencode::Integer(42))])).compose(), "d3:fooi42ee".as_bytes()); - } - - #[test] - fn test_integer_only() { - assert_eq!(Bencode::parse_int_only("42".as_bytes()).unwrap(), 42); - assert_eq!(Bencode::parse_int_only("17".as_bytes()).unwrap(), 17); - assert_eq!(Bencode::parse_int_only("-17".as_bytes()).unwrap(), -17); - } - - #[test] - fn test_integer() { - assert_eq!(Bencode::parse_int("42".as_bytes()).unwrap(), Bencode::Integer(42)); - assert_eq!(Bencode::parse_int("17".as_bytes()).unwrap(), Bencode::Integer(17)); - assert_eq!(Bencode::parse_int("-17".as_bytes()).unwrap(), Bencode::Integer(-17)); - } - - #[test] - fn test_integer_str() { - assert_eq!(Bencode::parse("i42e".as_bytes()).unwrap(), Bencode::Integer(42)); - assert_eq!(Bencode::parse("i17e".as_bytes()).unwrap(), Bencode::Integer(17)); - assert_eq!(Bencode::parse("i-17e".as_bytes()).unwrap(), Bencode::Integer(-17)); - } - - #[test] - fn test_bytes() { - assert_eq!(Bencode::parse_bytes("hallo".as_bytes()).unwrap(), Bencode::Bytes(ByteString::from_str("hallo"))); - assert_eq!(Bencode::parse_bytes("tschüss".as_bytes()).unwrap(), Bencode::Bytes(ByteString::from_str("tschüss"))); - assert_eq!(Bencode::parse_bytes("💩".as_bytes()).unwrap(), Bencode::Bytes(ByteString::from_str("💩"))); - } - - fn util_compose_bytes(input: &str) -> String { - format!("{}:{input}", input.len()) - } - - #[test] - fn test_bytes_str() { - assert_eq!(Bencode::parse(util_compose_bytes("hallo").as_bytes()).unwrap(), Bencode::Bytes(ByteString::from_str("hallo"))); - assert_eq!(Bencode::parse(util_compose_bytes("tschüss").as_bytes()).unwrap(), Bencode::Bytes(ByteString::from_str("tschüss"))); - assert_eq!(Bencode::parse(util_compose_bytes("💩").as_bytes()).unwrap(), Bencode::Bytes(ByteString::from_str("💩"))); - assert_eq!(Bencode::parse(util_compose_bytes("hallo 💩, this is a long text").as_bytes()).unwrap(), Bencode::Bytes(ByteString::from_str("hallo 💩, this is a long text"))); - } - - #[test] - fn test_list() { - assert_eq!(Bencode::parse_list( - ("e").as_bytes()).unwrap(), - (Bencode::List(Vec::new()), 0) - ); - let str = "i42ee"; - assert_eq!(Bencode::parse_list( - str.as_bytes()).unwrap(), - ( - Bencode::List(vec![ - Bencode::Integer(42), - ]), - str.len() - 1, - ) - ); - let str = format!("{}e", util_compose_bytes("hallo")); - assert_eq!(Bencode::parse_list( - str.as_bytes()).unwrap(), - ( - Bencode::List(vec![ - Bencode::Bytes(ByteString::from_str("hallo")), - ]), - str.len() - 1, - ) - ); - let str = format!("{}{}e", util_compose_bytes("hallo"), "i42e"); - assert_eq!(Bencode::parse_list( - str.as_bytes()).unwrap(), - ( - Bencode::List(vec![ - Bencode::Bytes(ByteString::from_str("hallo")), - Bencode::Integer(42), - ]), - str.len() - 1, - ) - ); - let str = format!("{}{}{}{}e", "i-17e", util_compose_bytes("hallo"), "i42e", util_compose_bytes("tschüssi💩")); - assert_eq!(Bencode::parse_list( - str.as_bytes()).unwrap(), - ( - Bencode::List(vec![ - Bencode::Integer(-17), - Bencode::Bytes(ByteString::from_str("hallo")), - Bencode::Integer(42), - Bencode::Bytes(ByteString::from_str("tschüssi💩")), - ]), - str.len() - 1, - ) - ); - } - - #[test] - fn test_multi_list() { - assert_eq!(Bencode::parse_list( - ("lelee").as_bytes()).unwrap(), - (Bencode::List(vec![ - Bencode::List(Vec::new()), - Bencode::List(Vec::new()), - ]), - 4) - ); - let str = format!("l{}{}ee", util_compose_bytes("hallo"), "i42e"); - assert_eq!(Bencode::parse_list( - str.as_bytes()).unwrap(), - (Bencode::List(vec![ - Bencode::List(vec![ - Bencode::Bytes(ByteString::from_str("hallo")), - Bencode::Integer(42), - ]), - ]), - str.len() - 1) - ); - let str = format!("l{}{}el{}{}{}ee", util_compose_bytes("hallo"), "i42e", "i17e", util_compose_bytes("tschüss💩"), "i33e"); - assert_eq!(Bencode::parse_list( - str.as_bytes()).unwrap(), - (Bencode::List(vec![ - Bencode::List(vec![ - Bencode::Bytes(ByteString::from_str("hallo")), - Bencode::Integer(42), - ]), - Bencode::List(vec![ - Bencode::Integer(17), - Bencode::Bytes(ByteString::from_str("tschüss💩")), - Bencode::Integer(33), - ]), - ]), - str.len() - 1) - ); - } - - #[test] - fn test_list_str() { - assert_eq!(Bencode::parse( - "le".as_bytes()).unwrap(), - Bencode::List(Vec::new()) - ); - assert_eq!(Bencode::parse( - "li42ee".as_bytes()).unwrap(), - Bencode::List(vec![ - Bencode::Integer(42), - ]) - ); - assert_eq!(Bencode::parse( - (format!("l{}e", util_compose_bytes("hallo"))).as_bytes()).unwrap(), - Bencode::List(vec![ - Bencode::Bytes(ByteString::from_str("hallo")), - ]) - ); - assert_eq!(Bencode::parse( - (format!("l{}{}e", util_compose_bytes("hallo"), "i42e")).as_bytes()).unwrap(), - Bencode::List(vec![ - Bencode::Bytes(ByteString::from_str("hallo")), - Bencode::Integer(42), - ]) - ); - assert_eq!(Bencode::parse( - (format!("l{}{}{}{}e", "i-17e", util_compose_bytes("hallo"), "i42e", util_compose_bytes("tschüssi💩"))).as_bytes()).unwrap(), - Bencode::List(vec![ - Bencode::Integer(-17), - Bencode::Bytes(ByteString::from_str("hallo")), - Bencode::Integer(42), - Bencode::Bytes(ByteString::from_str("tschüssi💩")), - ]) - ); - assert_eq!(Bencode::parse("llelee".as_bytes()).unwrap(), - Bencode::List(vec![ - Bencode::List(Vec::new()), - Bencode::List(Vec::new()), - ]), - ); - let str = format!("ll{}{}ee", util_compose_bytes("hallo"), "i42e"); - assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), - Bencode::List(vec![ - Bencode::List(vec![ - Bencode::Bytes(ByteString::from_str("hallo")), - Bencode::Integer(42), - ]), - ]), - ); - let str = format!("ll{}{}el{}{}{}ee", util_compose_bytes("hallo"), "i42e", "i17e", util_compose_bytes("tschüss💩"), "i33e"); - assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), - Bencode::List(vec![ - Bencode::List(vec![ - Bencode::Bytes(ByteString::from_str("hallo")), - Bencode::Integer(42), - ]), - Bencode::List(vec![ - Bencode::Integer(17), - Bencode::Bytes(ByteString::from_str("tschüss💩")), - Bencode::Integer(33), - ]), - ]), - ); - let str = format!("ll{}{}ed{}{}{}{}ee", util_compose_bytes("hallo"), "i42e", util_compose_bytes("foo"), "i23e", util_compose_bytes("bar"), util_compose_bytes("baz")); - assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), - Bencode::List(vec![ - Bencode::List(vec![ - Bencode::Bytes(ByteString::from_str("hallo")), - Bencode::Integer(42), - ]), - Bencode::Dict(HashMap::from([ - (ByteString::from_str("foo"), Bencode::Integer(23)), - (ByteString::from_str("bar"), Bencode::Bytes(ByteString::from_str("baz"))), - ])), - ]), - ); - } - #[test] - fn test_dict() { - assert_eq!(Bencode::parse_dict( - ("e").as_bytes()).unwrap(), - (Bencode::Dict(HashMap::new()), 0) - ); - let str = "3:fooi42ee"; - assert_eq!(Bencode::parse_dict( - str.as_bytes()).unwrap(), - (Bencode::Dict(HashMap::from([ - (ByteString::from_str("foo"), Bencode::Integer(42)), - ])), str.len() - 1) - ); - let str = "3:foo3:bare"; - assert_eq!(Bencode::parse_dict( - str.as_bytes()).unwrap(), - (Bencode::Dict(HashMap::from([ - (ByteString::from_str("foo"), Bencode::Bytes(ByteString::from_str("bar"))), - ])), str.len() - 1) - ); - let str = "3:fooi42e3:bar3:baze"; - assert_eq!(Bencode::parse_dict( - str.as_bytes()).unwrap(), - (Bencode::Dict(HashMap::from([ - (ByteString::from_str("foo"), Bencode::Integer(42)), - (ByteString::from_str("bar"), Bencode::Bytes(ByteString::from_str("baz"))), - ])), str.len() - 1) - ); - let str = "3:fooli42ei17ee3:bar3:baze"; - assert_eq!(Bencode::parse_dict( - str.as_bytes()).unwrap(), - (Bencode::Dict(HashMap::from([ - (ByteString::from_str("foo"), Bencode::List( - vec![Bencode::Integer(42), Bencode::Integer(17)])), - (ByteString::from_str("bar"), Bencode::Bytes(ByteString::from_str("baz"))), - ])), str.len() - 1) - ); - } - - #[test] - fn test_dict_str() { - assert_eq!(Bencode::parse("de".as_bytes()).unwrap(), - Bencode::Dict(HashMap::new()) - ); - let str = "d3:fooi42ee"; - assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), - Bencode::Dict(HashMap::from([ - (ByteString::from_str("foo"), Bencode::Integer(42)), - ])) - ); - let str = format!("d{}i42ee", util_compose_bytes("💩")); - assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), - Bencode::Dict(HashMap::from([ - (ByteString::from_str("💩"), Bencode::Integer(42)), - ])) - ); - let str = "d3:foo3:bare"; - assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), - Bencode::Dict(HashMap::from([ - (ByteString::from_str("foo"), Bencode::Bytes(ByteString::from_str("bar"))), - ])) - ); - let str = "d3:fooi42e3:bar3:baze"; - assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), - Bencode::Dict(HashMap::from([ - (ByteString::from_str("foo"), Bencode::Integer(42)), - (ByteString::from_str("bar"), Bencode::Bytes(ByteString::from_str("baz"))), - ])) - ); - let str = "d3:fooli42ei17ee3:bar3:baze"; - assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), - Bencode::Dict(HashMap::from([ - (ByteString::from_str("foo"), Bencode::List( - vec![Bencode::Integer(42), Bencode::Integer(17)])), - (ByteString::from_str("bar"), Bencode::Bytes(ByteString::from_str("baz"))), - ])) - ); - let str = "d3:foo3:bare"; - assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), - Bencode::Dict(HashMap::from([ - (ByteString::from_str("foo"), Bencode::Bytes(ByteString::from_str("bar"))), - ])) - ); - } - -} +pub mod custom; +pub mod de; diff --git a/src/bencode/custom.rs b/src/bencode/custom.rs new file mode 100644 index 0000000..3ec0578 --- /dev/null +++ b/src/bencode/custom.rs @@ -0,0 +1,559 @@ +use std::collections::HashMap; +use std::fmt::{Display, Formatter}; + +use anyhow::{anyhow, ensure, Result}; +use sha1::{Digest, Sha1}; + + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)] +pub struct ByteString(Vec); + +impl Display for ByteString { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if let Ok(str) = std::str::from_utf8(&*self.0) { + write!(f, "{}", str) + } else { + let str_ints: Vec<_> = self.0.iter().map(|x| x.to_string()).collect(); + write!(f, "{{{}}}", str_ints.join(", ")) + } + } +} + +impl ByteString { + + pub fn to_vec(&self) -> &Vec { + &self.0 + } + pub fn to_string(&self) -> Result { + let result = std::str::from_utf8(&self.0[..])?; + Ok(result.to_string()) + } + #[allow(dead_code)] + pub fn from_str(input: &str) -> Self { + ByteString(input.as_bytes().to_vec()) + } + + pub fn from_slice(input: &[u8]) -> Self { + ByteString(input.to_vec()) + } +} + +#[derive(Debug, PartialEq)] +enum BencodeType { + Integer, + Bytes, + List, + Dict, +} + +#[derive(Debug, PartialEq)] +pub enum Bencode { + Integer(i64), + Bytes(ByteString), + List(Vec), + Dict(HashMap), +} + +impl Display for Bencode { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Bencode::Integer(i) => { write!(f, "{{i: {}}}", i) } + Bencode::Bytes(b) => { write!(f, "{{b: {}}}", b) } + Bencode::List(l) => { + let strs: Vec<_> = l.iter().map(|x| x.to_string()).collect(); + write!(f, "{{l: [\n{}\n]}}", strs.join(",\n")) + } + Bencode::Dict(d) => { + write!(f, "{{d: {{\n")?; + for (k, v) in d { + write!(f, " {k} : {v}\n")?; + } + write!(f, "\n}} }}") + } + } + + } +} + +impl Bencode { + + pub fn compose(&self) -> Vec { + match self { + Bencode::Integer(i) => {format!("i{}e", i).as_bytes().to_vec()} + Bencode::Bytes(b) => {[format!("{}:", b.0.len()).as_bytes(), &b.0[..]].concat()} + Bencode::List(l) => { + let mut result = vec!['l' as u8]; + for item in l { + result.extend(item.compose()) + } + result.push('e' as u8); + result + } + Bencode::Dict(d) => { + let mut result = vec!['d' as u8]; + let mut sorted_keys = d.keys().collect::>(); + sorted_keys.sort(); + + for k in sorted_keys { + result.extend(Self::compose(&Bencode::Bytes(k.clone()))); + let v = &d[k]; + result.extend(v.compose()); + } + result.push('e' as u8); + result + } + } + } + pub fn parse(input: &[u8]) -> Result { + let (result, end_pos) = Self::parse_type(input)?; + ensure!(end_pos == input.len() - 1, + "Could not fully decode input. Got {} chars left to decode", input.len() - 1 - end_pos); + Ok(result) + } + + pub fn sha1(&self) -> [u8; 20] { + let mut hasher = Sha1::new(); + + let composed = self.compose(); + hasher.update(composed); + + hasher.finalize().into() + } + + fn parse_type(input: &[u8]) -> Result<(Bencode, usize)>{ + if input.len() == 0 { + return Err(anyhow!("Empty string is not valid bencode")) + } + let char = input[0] as char; + let type_ = match char { + 'i' => Ok(BencodeType::Integer), + 'l' => Ok(BencodeType::List), + 'd' => Ok(BencodeType::Dict), + '0'..='9' => Ok(BencodeType::Bytes), + _ => Err(anyhow!("Invalid bencoding start char: {char}")), + }?; + let end_to_decode = match type_ { + BencodeType::List | BencodeType::Dict => { + input.len() + }, + BencodeType::Integer | BencodeType::Bytes => { + let end_char = match type_ { + BencodeType::Integer => Ok('e'), + BencodeType::Bytes => Ok(':'), + _ => Err(anyhow!("Should be logically impossible")) + }?; + let result = input.iter().position(|&x| x == end_char as u8).ok_or_else(|| anyhow!("Could not find terminating character {end_char}"))?; + result + } + }; + let (to_decode, end_decoded) = match type_ { + BencodeType::Integer => (&input[1..end_to_decode], end_to_decode), + BencodeType::Bytes => { + let bytes_len = Self::parse_int_only(&input[0..end_to_decode])? as usize; + let bytes_start= end_to_decode + 1; + let end_pos = bytes_start + bytes_len; + (&input[bytes_start..end_pos], end_pos - 1) + }, + BencodeType::List | BencodeType::Dict => (&input[1..end_to_decode], 0), + }; + + match type_ { + BencodeType::Integer => { + let result = Self::parse_int(to_decode)?; + Ok((result, end_decoded)) + }, + BencodeType::Bytes => { + let result = Self::parse_bytes(to_decode)?; + Ok((result, end_decoded)) + }, + BencodeType::List => { + let (result, end_pos) = Self::parse_list(to_decode)?; + Ok((result, end_pos + 1)) + }, + BencodeType::Dict => { + let (result, end_pos) = Self::parse_dict(to_decode)?; + Ok((result, end_pos + 1)) + }, + } + } + + fn parse_int_only(input: &[u8]) -> Result { + let int_str = std::str::from_utf8(input)?; + int_str.parse::().map_err(anyhow::Error::msg) + } + + fn parse_int(input: &[u8]) -> Result { + let int = Self::parse_int_only(input)?; + Ok(Bencode::Integer(int)) + } + + fn parse_bytes(input: &[u8]) -> Result { + Ok(Bencode::Bytes(ByteString::from_slice(input))) + } + + fn parse_list(input: &[u8]) -> Result<(Bencode, usize)> { + let mut result = Vec::new(); + let mut decoded_pos = 0; + loop { + if decoded_pos >= input.len() { + return Err(anyhow!("Unfinished list. Could not find terminating character 'e'")) + } + if input[decoded_pos] == 'e' as u8 { + break; + } + let (li_result, end_pos ) = Self::parse_type(&input[decoded_pos..])?; + result.push(li_result); + decoded_pos += end_pos + 1; + } + Ok((Bencode::List(result), decoded_pos)) + } + + fn parse_dict(input: &[u8]) -> Result<(Bencode, usize)> { + let mut result = HashMap::new(); + let mut decoded_pos = 0; + loop { + if decoded_pos >= input.len() { + return Err(anyhow!("Unfinished dict. Could not find terminating character 'e'")) + } + if input[decoded_pos] == 'e' as u8 { + break; + } + let (Bencode::Bytes(key_result), end_pos) = Self::parse_type(&input[decoded_pos..])? else { + return Err(anyhow!("Type of dictionary key not Bytes")) + }; + decoded_pos += end_pos + 1; + let (value_result, end_pos) = Self::parse_type(&input[decoded_pos..])?; + result.insert(key_result, value_result); + decoded_pos += end_pos + 1; + } + Ok((Bencode::Dict(result), decoded_pos)) + } +} + +#[cfg(test)] +mod tests { + use hex_literal::hex; + use super::*; + + #[test] + fn test_sha1() { + assert_eq!(Bencode::Integer(42).sha1(), hex!("3ce69356df4222111c27b41cccf2164e6cced799")); + assert_eq!(Bencode::Bytes(ByteString::from_str("foo")).sha1(), hex!("a8f4559a9623f25c4d5f1155f31a2604c55e1334")); + } + + #[test] + fn test_compose_int() { + assert_eq!(Bencode::Integer(42).compose(), "i42e".as_bytes()); + assert_eq!(Bencode::Integer(17).compose(), "i17e".as_bytes()); + } + + #[test] + fn test_compose_bytes() { + assert_eq!(Bencode::Bytes(ByteString::from_str("foo")).compose(), "3:foo".as_bytes()); + assert_eq!(Bencode::Bytes(ByteString::from_str("bar")).compose(), "3:bar".as_bytes()); + } + + #[test] + fn test_compose_list() { + assert_eq!(Bencode::List(vec![Bencode::Bytes(ByteString::from_str("foo")), Bencode::Integer(42)]).compose(), "l3:fooi42ee".as_bytes()); + } + + #[test] + fn test_compose_dict() { + assert_eq!(Bencode::Dict(HashMap::from([(ByteString::from_str("foo"), Bencode::Integer(42))])).compose(), "d3:fooi42ee".as_bytes()); + } + + #[test] + fn test_integer_only() { + assert_eq!(Bencode::parse_int_only("42".as_bytes()).unwrap(), 42); + assert_eq!(Bencode::parse_int_only("17".as_bytes()).unwrap(), 17); + assert_eq!(Bencode::parse_int_only("-17".as_bytes()).unwrap(), -17); + } + + #[test] + fn test_integer() { + assert_eq!(Bencode::parse_int("42".as_bytes()).unwrap(), Bencode::Integer(42)); + assert_eq!(Bencode::parse_int("17".as_bytes()).unwrap(), Bencode::Integer(17)); + assert_eq!(Bencode::parse_int("-17".as_bytes()).unwrap(), Bencode::Integer(-17)); + } + + #[test] + fn test_integer_str() { + assert_eq!(Bencode::parse("i42e".as_bytes()).unwrap(), Bencode::Integer(42)); + assert_eq!(Bencode::parse("i17e".as_bytes()).unwrap(), Bencode::Integer(17)); + assert_eq!(Bencode::parse("i-17e".as_bytes()).unwrap(), Bencode::Integer(-17)); + } + + #[test] + fn test_bytes() { + assert_eq!(Bencode::parse_bytes("hallo".as_bytes()).unwrap(), Bencode::Bytes(ByteString::from_str("hallo"))); + assert_eq!(Bencode::parse_bytes("tschüss".as_bytes()).unwrap(), Bencode::Bytes(ByteString::from_str("tschüss"))); + assert_eq!(Bencode::parse_bytes("💩".as_bytes()).unwrap(), Bencode::Bytes(ByteString::from_str("💩"))); + } + + fn util_compose_bytes(input: &str) -> String { + format!("{}:{input}", input.len()) + } + + #[test] + fn test_bytes_str() { + assert_eq!(Bencode::parse(util_compose_bytes("hallo").as_bytes()).unwrap(), Bencode::Bytes(ByteString::from_str("hallo"))); + assert_eq!(Bencode::parse(util_compose_bytes("tschüss").as_bytes()).unwrap(), Bencode::Bytes(ByteString::from_str("tschüss"))); + assert_eq!(Bencode::parse(util_compose_bytes("💩").as_bytes()).unwrap(), Bencode::Bytes(ByteString::from_str("💩"))); + assert_eq!(Bencode::parse(util_compose_bytes("hallo 💩, this is a long text").as_bytes()).unwrap(), Bencode::Bytes(ByteString::from_str("hallo 💩, this is a long text"))); + } + + #[test] + fn test_list() { + assert_eq!(Bencode::parse_list( + ("e").as_bytes()).unwrap(), + (Bencode::List(Vec::new()), 0) + ); + let str = "i42ee"; + assert_eq!(Bencode::parse_list( + str.as_bytes()).unwrap(), + ( + Bencode::List(vec![ + Bencode::Integer(42), + ]), + str.len() - 1, + ) + ); + let str = format!("{}e", util_compose_bytes("hallo")); + assert_eq!(Bencode::parse_list( + str.as_bytes()).unwrap(), + ( + Bencode::List(vec![ + Bencode::Bytes(ByteString::from_str("hallo")), + ]), + str.len() - 1, + ) + ); + let str = format!("{}{}e", util_compose_bytes("hallo"), "i42e"); + assert_eq!(Bencode::parse_list( + str.as_bytes()).unwrap(), + ( + Bencode::List(vec![ + Bencode::Bytes(ByteString::from_str("hallo")), + Bencode::Integer(42), + ]), + str.len() - 1, + ) + ); + let str = format!("{}{}{}{}e", "i-17e", util_compose_bytes("hallo"), "i42e", util_compose_bytes("tschüssi💩")); + assert_eq!(Bencode::parse_list( + str.as_bytes()).unwrap(), + ( + Bencode::List(vec![ + Bencode::Integer(-17), + Bencode::Bytes(ByteString::from_str("hallo")), + Bencode::Integer(42), + Bencode::Bytes(ByteString::from_str("tschüssi💩")), + ]), + str.len() - 1, + ) + ); + } + + #[test] + fn test_multi_list() { + assert_eq!(Bencode::parse_list( + ("lelee").as_bytes()).unwrap(), + (Bencode::List(vec![ + Bencode::List(Vec::new()), + Bencode::List(Vec::new()), + ]), + 4) + ); + let str = format!("l{}{}ee", util_compose_bytes("hallo"), "i42e"); + assert_eq!(Bencode::parse_list( + str.as_bytes()).unwrap(), + (Bencode::List(vec![ + Bencode::List(vec![ + Bencode::Bytes(ByteString::from_str("hallo")), + Bencode::Integer(42), + ]), + ]), + str.len() - 1) + ); + let str = format!("l{}{}el{}{}{}ee", util_compose_bytes("hallo"), "i42e", "i17e", util_compose_bytes("tschüss💩"), "i33e"); + assert_eq!(Bencode::parse_list( + str.as_bytes()).unwrap(), + (Bencode::List(vec![ + Bencode::List(vec![ + Bencode::Bytes(ByteString::from_str("hallo")), + Bencode::Integer(42), + ]), + Bencode::List(vec![ + Bencode::Integer(17), + Bencode::Bytes(ByteString::from_str("tschüss💩")), + Bencode::Integer(33), + ]), + ]), + str.len() - 1) + ); + } + + #[test] + fn test_list_str() { + assert_eq!(Bencode::parse( + "le".as_bytes()).unwrap(), + Bencode::List(Vec::new()) + ); + assert_eq!(Bencode::parse( + "li42ee".as_bytes()).unwrap(), + Bencode::List(vec![ + Bencode::Integer(42), + ]) + ); + assert_eq!(Bencode::parse( + (format!("l{}e", util_compose_bytes("hallo"))).as_bytes()).unwrap(), + Bencode::List(vec![ + Bencode::Bytes(ByteString::from_str("hallo")), + ]) + ); + assert_eq!(Bencode::parse( + (format!("l{}{}e", util_compose_bytes("hallo"), "i42e")).as_bytes()).unwrap(), + Bencode::List(vec![ + Bencode::Bytes(ByteString::from_str("hallo")), + Bencode::Integer(42), + ]) + ); + assert_eq!(Bencode::parse( + (format!("l{}{}{}{}e", "i-17e", util_compose_bytes("hallo"), "i42e", util_compose_bytes("tschüssi💩"))).as_bytes()).unwrap(), + Bencode::List(vec![ + Bencode::Integer(-17), + Bencode::Bytes(ByteString::from_str("hallo")), + Bencode::Integer(42), + Bencode::Bytes(ByteString::from_str("tschüssi💩")), + ]) + ); + assert_eq!(Bencode::parse("llelee".as_bytes()).unwrap(), + Bencode::List(vec![ + Bencode::List(Vec::new()), + Bencode::List(Vec::new()), + ]), + ); + let str = format!("ll{}{}ee", util_compose_bytes("hallo"), "i42e"); + assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), + Bencode::List(vec![ + Bencode::List(vec![ + Bencode::Bytes(ByteString::from_str("hallo")), + Bencode::Integer(42), + ]), + ]), + ); + let str = format!("ll{}{}el{}{}{}ee", util_compose_bytes("hallo"), "i42e", "i17e", util_compose_bytes("tschüss💩"), "i33e"); + assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), + Bencode::List(vec![ + Bencode::List(vec![ + Bencode::Bytes(ByteString::from_str("hallo")), + Bencode::Integer(42), + ]), + Bencode::List(vec![ + Bencode::Integer(17), + Bencode::Bytes(ByteString::from_str("tschüss💩")), + Bencode::Integer(33), + ]), + ]), + ); + let str = format!("ll{}{}ed{}{}{}{}ee", util_compose_bytes("hallo"), "i42e", util_compose_bytes("foo"), "i23e", util_compose_bytes("bar"), util_compose_bytes("baz")); + assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), + Bencode::List(vec![ + Bencode::List(vec![ + Bencode::Bytes(ByteString::from_str("hallo")), + Bencode::Integer(42), + ]), + Bencode::Dict(HashMap::from([ + (ByteString::from_str("foo"), Bencode::Integer(23)), + (ByteString::from_str("bar"), Bencode::Bytes(ByteString::from_str("baz"))), + ])), + ]), + ); + } + #[test] + fn test_dict() { + assert_eq!(Bencode::parse_dict( + ("e").as_bytes()).unwrap(), + (Bencode::Dict(HashMap::new()), 0) + ); + let str = "3:fooi42ee"; + assert_eq!(Bencode::parse_dict( + str.as_bytes()).unwrap(), + (Bencode::Dict(HashMap::from([ + (ByteString::from_str("foo"), Bencode::Integer(42)), + ])), str.len() - 1) + ); + let str = "3:foo3:bare"; + assert_eq!(Bencode::parse_dict( + str.as_bytes()).unwrap(), + (Bencode::Dict(HashMap::from([ + (ByteString::from_str("foo"), Bencode::Bytes(ByteString::from_str("bar"))), + ])), str.len() - 1) + ); + let str = "3:fooi42e3:bar3:baze"; + assert_eq!(Bencode::parse_dict( + str.as_bytes()).unwrap(), + (Bencode::Dict(HashMap::from([ + (ByteString::from_str("foo"), Bencode::Integer(42)), + (ByteString::from_str("bar"), Bencode::Bytes(ByteString::from_str("baz"))), + ])), str.len() - 1) + ); + let str = "3:fooli42ei17ee3:bar3:baze"; + assert_eq!(Bencode::parse_dict( + str.as_bytes()).unwrap(), + (Bencode::Dict(HashMap::from([ + (ByteString::from_str("foo"), Bencode::List( + vec![Bencode::Integer(42), Bencode::Integer(17)])), + (ByteString::from_str("bar"), Bencode::Bytes(ByteString::from_str("baz"))), + ])), str.len() - 1) + ); + } + + #[test] + fn test_dict_str() { + assert_eq!(Bencode::parse("de".as_bytes()).unwrap(), + Bencode::Dict(HashMap::new()) + ); + let str = "d3:fooi42ee"; + assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), + Bencode::Dict(HashMap::from([ + (ByteString::from_str("foo"), Bencode::Integer(42)), + ])) + ); + let str = format!("d{}i42ee", util_compose_bytes("💩")); + assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), + Bencode::Dict(HashMap::from([ + (ByteString::from_str("💩"), Bencode::Integer(42)), + ])) + ); + let str = "d3:foo3:bare"; + assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), + Bencode::Dict(HashMap::from([ + (ByteString::from_str("foo"), Bencode::Bytes(ByteString::from_str("bar"))), + ])) + ); + let str = "d3:fooi42e3:bar3:baze"; + assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), + Bencode::Dict(HashMap::from([ + (ByteString::from_str("foo"), Bencode::Integer(42)), + (ByteString::from_str("bar"), Bencode::Bytes(ByteString::from_str("baz"))), + ])) + ); + let str = "d3:fooli42ei17ee3:bar3:baze"; + assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), + Bencode::Dict(HashMap::from([ + (ByteString::from_str("foo"), Bencode::List( + vec![Bencode::Integer(42), Bencode::Integer(17)])), + (ByteString::from_str("bar"), Bencode::Bytes(ByteString::from_str("baz"))), + ])) + ); + let str = "d3:foo3:bare"; + assert_eq!(Bencode::parse(str.as_bytes()).unwrap(), + Bencode::Dict(HashMap::from([ + (ByteString::from_str("foo"), Bencode::Bytes(ByteString::from_str("bar"))), + ])) + ); + } + +} diff --git a/src/serde_bencode.rs b/src/bencode/de.rs similarity index 99% rename from src/serde_bencode.rs rename to src/bencode/de.rs index 0894717..21cf60e 100644 --- a/src/serde_bencode.rs +++ b/src/bencode/de.rs @@ -506,7 +506,7 @@ mod test { use serde::Deserialize; - use crate::serde_bencode::{Error, from_bytes, from_str}; + use crate::de::{Error, from_bytes, from_str}; #[test] fn test_int() { diff --git a/src/main.rs b/src/main.rs index a7910e7..5450eec 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,10 +9,9 @@ use rand::prelude::*; use reqwest::blocking::Client; use reqwest::Url; -use crate::bencode::{Bencode, ByteString}; +use crate::bencode::custom::{Bencode, ByteString}; mod bencode; -mod serde_bencode; #[derive(Debug)] struct FileInfo {