392 lines
12 KiB
Go
392 lines
12 KiB
Go
package middleout
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/binary"
|
|
"fmt"
|
|
"math"
|
|
"os"
|
|
)
|
|
|
|
// debugPrintf prints debug messages only if DEBUG=1 is set
|
|
func debugPrintf(format string, args ...interface{}) {
|
|
if os.Getenv("DEBUG") == "1" {
|
|
fmt.Printf(format, args...)
|
|
}
|
|
}
|
|
|
|
// MOSCCompressor handles Middle Out Spiral Compression
|
|
type MOSCCompressor struct {
|
|
spiralFactor float64 // Controls spiral tightness
|
|
clusterSize int // Maximum bytes per cluster
|
|
maxFractal int // Maximum fractal recursion depth
|
|
}
|
|
|
|
// NewMOSCCompressor initializes a new compressor
|
|
func NewMOSCCompressor(spiralFactor float64, clusterSize, maxFractal int) *MOSCCompressor {
|
|
if clusterSize < 4 {
|
|
clusterSize = 4
|
|
}
|
|
return &MOSCCompressor{
|
|
spiralFactor: spiralFactor,
|
|
clusterSize: clusterSize,
|
|
maxFractal: maxFractal,
|
|
}
|
|
}
|
|
|
|
// Compress compresses the input data using MOSC
|
|
func (c *MOSCCompressor) Compress(data []byte) ([]byte, error) {
|
|
if len(data) == 0 {
|
|
return nil, fmt.Errorf("empty input")
|
|
}
|
|
|
|
// Generate spiral indices
|
|
spiralIndices := c.generateSpiralIndices(len(data))
|
|
|
|
// Cluster bytes based on spiral traversal
|
|
clusters, affinities := c.formClusters(data, spiralIndices)
|
|
|
|
// Debug: Log first 5 clusters and clusters 275-280
|
|
for i := 0; i < len(clusters); i++ {
|
|
if i < 5 || (i >= 275 && i <= 280) {
|
|
debugPrintf("Compress cluster %d: %v\n", i, clusters[i])
|
|
}
|
|
}
|
|
|
|
// Detect fractal patterns
|
|
fractalMap := c.detectFractalPatterns(clusters)
|
|
|
|
// Build probability-based codebook
|
|
codebook := c.buildCodebook(affinities, clusters)
|
|
|
|
// Encode clusters
|
|
encoded, err := c.encodeClusters(clusters, fractalMap, codebook)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Combine output: header + codebook + encoded data
|
|
var output bytes.Buffer
|
|
header := struct {
|
|
DataLen uint32
|
|
ClusterCount uint32
|
|
CodebookLen uint32
|
|
SpiralFactor float64
|
|
}{
|
|
DataLen: uint32(len(data)),
|
|
ClusterCount: uint32(len(clusters)),
|
|
CodebookLen: uint32(len(codebook)),
|
|
SpiralFactor: c.spiralFactor,
|
|
}
|
|
if err := binary.Write(&output, binary.BigEndian, &header); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Write codebook
|
|
for code, seq := range codebook {
|
|
output.WriteByte(byte(code))
|
|
output.WriteByte(byte(len(seq)))
|
|
output.Write(seq)
|
|
}
|
|
|
|
// Write encoded data
|
|
output.Write(encoded)
|
|
|
|
return output.Bytes(), nil
|
|
}
|
|
|
|
// generateSpiralIndices creates a spiral traversal order
|
|
func (c *MOSCCompressor) generateSpiralIndices(length int) []int {
|
|
result := make([]int, length)
|
|
used := make(map[int]bool)
|
|
indexCount := 0
|
|
|
|
// Generate spiral indices
|
|
center := float64(length) / 2
|
|
theta := 0.0
|
|
for i := 0; i < length*2; i++ {
|
|
radius := math.Exp(c.spiralFactor * theta)
|
|
x := int(math.Round(center + radius*math.Cos(theta)))
|
|
if x < 0 {
|
|
x = 0
|
|
}
|
|
if x >= length {
|
|
x = length - 1
|
|
}
|
|
if !used[x] {
|
|
result[indexCount] = x
|
|
used[x] = true
|
|
indexCount++
|
|
}
|
|
theta += 0.1
|
|
if indexCount >= length {
|
|
break
|
|
}
|
|
}
|
|
|
|
// Fill remaining indices
|
|
for i := 0; i < length; i++ {
|
|
if !used[i] {
|
|
result[indexCount] = i
|
|
used[i] = true
|
|
indexCount++
|
|
}
|
|
}
|
|
|
|
// Validate permutation
|
|
count := make(map[int]int)
|
|
for _, idx := range result {
|
|
count[idx]++
|
|
if idx < 0 || idx >= length {
|
|
debugPrintf("Invalid index: %d\n", idx)
|
|
}
|
|
if count[idx] > 1 {
|
|
debugPrintf("Duplicate index: %d\n", idx)
|
|
}
|
|
}
|
|
if indexCount != length || len(count) != length {
|
|
debugPrintf("Error: Spiral indices invalid: count %d, unique %d, want %d\n", indexCount, len(count), length)
|
|
// Fallback to sequential
|
|
for i := 0; i < length; i++ {
|
|
result[i] = i
|
|
}
|
|
}
|
|
|
|
// Debug: Print first N indices
|
|
logLen := length
|
|
if logLen > 10 {
|
|
logLen = 10
|
|
}
|
|
debugPrintf("Spiral indices (first %d): %v\n", logLen, result[:logLen])
|
|
return result
|
|
}
|
|
|
|
// formClusters groups bytes into clusters based on spiral proximity
|
|
func (c *MOSCCompressor) formClusters(data []byte, indices []int) ([][]byte, []float64) {
|
|
var clusters [][]byte
|
|
var affinities []float64
|
|
for i := 0; i < len(indices); i += c.clusterSize {
|
|
end := i + c.clusterSize
|
|
if end > len(indices) {
|
|
end = len(indices)
|
|
}
|
|
cluster := make([]byte, 0, c.clusterSize)
|
|
for j := i; j < end; j++ {
|
|
cluster = append(cluster, data[indices[j]])
|
|
}
|
|
clusters = append(clusters, cluster)
|
|
freq := make(map[byte]int)
|
|
for _, b := range cluster {
|
|
freq[b]++
|
|
}
|
|
affinity := 0.0
|
|
for _, count := range freq {
|
|
prob := float64(count) / float64(len(cluster))
|
|
affinity -= prob * math.Log2(prob+1e-10)
|
|
}
|
|
affinities = append(affinities, affinity)
|
|
}
|
|
return clusters, affinities
|
|
}
|
|
|
|
// detectFractalPatterns identifies self-similar patterns
|
|
func (c *MOSCCompressor) detectFractalPatterns(clusters [][]byte) map[int][]int {
|
|
fractalMap := make(map[int][]int)
|
|
if c.maxFractal == 0 {
|
|
return fractalMap
|
|
}
|
|
for depth := 1; depth <= c.maxFractal; depth++ {
|
|
for i := 0; i < len(clusters); i++ {
|
|
for j := 0; j < i; j++ {
|
|
if c.isFractalSimilar(clusters[i], clusters[j], depth) {
|
|
// Ensure reference cluster is valid
|
|
if len(clusters[j]) == len(clusters[i]) {
|
|
fractalMap[i] = append(fractalMap[i], j)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Debug: Log fractal references
|
|
for i, refs := range fractalMap {
|
|
if i < 5 || (i >= 275 && i <= 280) {
|
|
debugPrintf("Fractal map cluster %d: refs=%v\n", i, refs)
|
|
}
|
|
}
|
|
return fractalMap
|
|
}
|
|
|
|
// isFractalSimilar checks if two clusters are similar at a given depth
|
|
func (c *MOSCCompressor) isFractalSimilar(c1, c2 []byte, depth int) bool {
|
|
if len(c1) != len(c2) {
|
|
return false
|
|
}
|
|
if depth == 0 {
|
|
return bytes.Equal(c1, c2)
|
|
}
|
|
mid := len(c1) / 2
|
|
return c.isFractalSimilar(c1[:mid], c2[:mid], depth-1) &&
|
|
c.isFractalSimilar(c1[mid:], c2[mid:], depth-1)
|
|
}
|
|
|
|
// buildCodebook creates a probability-based codebook
|
|
func (c *MOSCCompressor) buildCodebook(affinities []float64, clusters [][]byte) map[int][]byte {
|
|
codebook := make(map[int][]byte)
|
|
totalAffinity := 0.0
|
|
for _, aff := range affinities {
|
|
totalAffinity += math.Exp(aff)
|
|
}
|
|
for i, aff := range affinities {
|
|
prob := math.Exp(aff) / totalAffinity
|
|
if prob > 0.1 && i < len(clusters) { // Stricter threshold
|
|
codebook[i] = clusters[i]
|
|
}
|
|
}
|
|
return codebook
|
|
}
|
|
|
|
// encodeClusters encodes clusters using the codebook and fractal map
|
|
func (c *MOSCCompressor) encodeClusters(clusters [][]byte, fractalMap map[int][]int, codebook map[int][]byte) ([]byte, error) {
|
|
var output bytes.Buffer
|
|
rawCount, codebookCount, fractalCount := 0, 0, 0
|
|
for i, cluster := range clusters {
|
|
var encodingType string
|
|
if refs, ok := fractalMap[i]; ok && len(refs) > 0 {
|
|
output.WriteByte(0xFE)
|
|
output.WriteByte(byte(refs[0]))
|
|
encodingType = fmt.Sprintf("fractal ref=%d", refs[0])
|
|
fractalCount++
|
|
} else if _, ok := codebook[i]; ok {
|
|
output.WriteByte(0xFF)
|
|
output.WriteByte(byte(i))
|
|
encodingType = "codebook"
|
|
codebookCount++
|
|
} else {
|
|
output.WriteByte(0x00)
|
|
output.WriteByte(byte(len(cluster)))
|
|
output.Write(cluster)
|
|
encodingType = "raw"
|
|
rawCount++
|
|
}
|
|
// Debug: Log encoding type for clusters 0-4 and 275-280
|
|
if i < 5 || (i >= 275 && i <= 280) {
|
|
debugPrintf("Encode cluster %d: type=%s\n", i, encodingType)
|
|
}
|
|
}
|
|
// Debug: Log encoding stats
|
|
debugPrintf("Encoding stats: raw=%d, codebook=%d, fractal=%d\n", rawCount, codebookCount, fractalCount)
|
|
return output.Bytes(), nil
|
|
}
|
|
|
|
// Decompress decompresses the data
|
|
func (c *MOSCCompressor) Decompress(compressed []byte) ([]byte, error) {
|
|
if len(compressed) < 16 {
|
|
return nil, fmt.Errorf("invalid compressed data")
|
|
}
|
|
|
|
reader := bytes.NewReader(compressed)
|
|
var header struct {
|
|
DataLen uint32
|
|
ClusterCount uint32
|
|
CodebookLen uint32
|
|
SpiralFactor float64
|
|
}
|
|
if err := binary.Read(reader, binary.BigEndian, &header); err != nil {
|
|
return nil, err
|
|
}
|
|
c.spiralFactor = header.SpiralFactor
|
|
|
|
codebook := make(map[int][]byte)
|
|
for i := 0; i < int(header.CodebookLen); i++ {
|
|
code, err := reader.ReadByte()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
length, err := reader.ReadByte()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
seq := make([]byte, length)
|
|
if _, err := reader.Read(seq); err != nil {
|
|
return nil, err
|
|
}
|
|
codebook[int(code)] = seq
|
|
}
|
|
|
|
clusters := make([][]byte, header.ClusterCount)
|
|
for i := 0; i < int(header.ClusterCount); i++ {
|
|
marker, err := reader.ReadByte()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
switch marker {
|
|
case 0xFE:
|
|
ref, err := reader.ReadByte()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if int(ref) >= i {
|
|
return nil, fmt.Errorf("invalid fractal reference: %d", ref)
|
|
}
|
|
clusters[i] = clusters[ref]
|
|
case 0xFF:
|
|
code, err := reader.ReadByte()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if seq, ok := codebook[int(code)]; ok {
|
|
clusters[i] = seq
|
|
} else {
|
|
return nil, fmt.Errorf("invalid codebook code: %d", code)
|
|
}
|
|
case 0x00:
|
|
length, err := reader.ReadByte()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
cluster := make([]byte, length)
|
|
if _, err := reader.Read(cluster); err != nil {
|
|
return nil, err
|
|
}
|
|
clusters[i] = cluster
|
|
default:
|
|
return nil, fmt.Errorf("unknown marker: %x", marker)
|
|
}
|
|
// Debug: Log first 5 clusters and clusters 275-280
|
|
if i < 5 || (i >= 275 && i <= 280) {
|
|
debugPrintf("Decompress cluster %d: %v\n", i, clusters[i])
|
|
}
|
|
}
|
|
|
|
spiralIndices := c.generateSpiralIndices(int(header.DataLen))
|
|
data := make([]byte, header.DataLen)
|
|
clusterIdx := 0
|
|
clusterPos := 0
|
|
for i, idx := range spiralIndices {
|
|
if clusterIdx >= len(clusters) {
|
|
return nil, fmt.Errorf("insufficient clusters at index %d", i)
|
|
}
|
|
if clusterPos >= len(clusters[clusterIdx]) {
|
|
clusterIdx++
|
|
clusterPos = 0
|
|
if clusterIdx >= len(clusters) {
|
|
return nil, fmt.Errorf("insufficient clusters at index %d", i)
|
|
}
|
|
}
|
|
if idx < 0 || idx >= len(data) {
|
|
return nil, fmt.Errorf("invalid spiral index %d at position %d", idx, i)
|
|
}
|
|
data[idx] = clusters[clusterIdx][clusterPos]
|
|
// Debug: Log positions 0-9 and 2212-2222
|
|
if i < 10 || (i >= 2212 && i <= 2222) {
|
|
debugPrintf("Position %d: idx=%d, clusterIdx=%d, clusterPos=%d, byte=%d\n", i, idx, clusterIdx, clusterPos, data[idx])
|
|
}
|
|
clusterPos++
|
|
}
|
|
logLen := len(data)
|
|
if logLen > 100 {
|
|
logLen = 100
|
|
}
|
|
debugPrintf("Decompressed first %d bytes: %v\n", logLen, data[:logLen])
|
|
return data, nil
|
|
} |