first commit
This commit is contained in:
392
middleout/mosc.go
Normal file
392
middleout/mosc.go
Normal file
@ -0,0 +1,392 @@
|
||||
package middleout
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
)
|
||||
|
||||
// debugPrintf prints debug messages only if DEBUG=1 is set
|
||||
func debugPrintf(format string, args ...interface{}) {
|
||||
if os.Getenv("DEBUG") == "1" {
|
||||
fmt.Printf(format, args...)
|
||||
}
|
||||
}
|
||||
|
||||
// MOSCCompressor handles Middle Out Spiral Compression
|
||||
type MOSCCompressor struct {
|
||||
spiralFactor float64 // Controls spiral tightness
|
||||
clusterSize int // Maximum bytes per cluster
|
||||
maxFractal int // Maximum fractal recursion depth
|
||||
}
|
||||
|
||||
// NewMOSCCompressor initializes a new compressor
|
||||
func NewMOSCCompressor(spiralFactor float64, clusterSize, maxFractal int) *MOSCCompressor {
|
||||
if clusterSize < 4 {
|
||||
clusterSize = 4
|
||||
}
|
||||
return &MOSCCompressor{
|
||||
spiralFactor: spiralFactor,
|
||||
clusterSize: clusterSize,
|
||||
maxFractal: maxFractal,
|
||||
}
|
||||
}
|
||||
|
||||
// Compress compresses the input data using MOSC
|
||||
func (c *MOSCCompressor) Compress(data []byte) ([]byte, error) {
|
||||
if len(data) == 0 {
|
||||
return nil, fmt.Errorf("empty input")
|
||||
}
|
||||
|
||||
// Generate spiral indices
|
||||
spiralIndices := c.generateSpiralIndices(len(data))
|
||||
|
||||
// Cluster bytes based on spiral traversal
|
||||
clusters, affinities := c.formClusters(data, spiralIndices)
|
||||
|
||||
// Debug: Log first 5 clusters and clusters 275-280
|
||||
for i := 0; i < len(clusters); i++ {
|
||||
if i < 5 || (i >= 275 && i <= 280) {
|
||||
debugPrintf("Compress cluster %d: %v\n", i, clusters[i])
|
||||
}
|
||||
}
|
||||
|
||||
// Detect fractal patterns
|
||||
fractalMap := c.detectFractalPatterns(clusters)
|
||||
|
||||
// Build probability-based codebook
|
||||
codebook := c.buildCodebook(affinities, clusters)
|
||||
|
||||
// Encode clusters
|
||||
encoded, err := c.encodeClusters(clusters, fractalMap, codebook)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Combine output: header + codebook + encoded data
|
||||
var output bytes.Buffer
|
||||
header := struct {
|
||||
DataLen uint32
|
||||
ClusterCount uint32
|
||||
CodebookLen uint32
|
||||
SpiralFactor float64
|
||||
}{
|
||||
DataLen: uint32(len(data)),
|
||||
ClusterCount: uint32(len(clusters)),
|
||||
CodebookLen: uint32(len(codebook)),
|
||||
SpiralFactor: c.spiralFactor,
|
||||
}
|
||||
if err := binary.Write(&output, binary.BigEndian, &header); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Write codebook
|
||||
for code, seq := range codebook {
|
||||
output.WriteByte(byte(code))
|
||||
output.WriteByte(byte(len(seq)))
|
||||
output.Write(seq)
|
||||
}
|
||||
|
||||
// Write encoded data
|
||||
output.Write(encoded)
|
||||
|
||||
return output.Bytes(), nil
|
||||
}
|
||||
|
||||
// generateSpiralIndices creates a spiral traversal order
|
||||
func (c *MOSCCompressor) generateSpiralIndices(length int) []int {
|
||||
result := make([]int, length)
|
||||
used := make(map[int]bool)
|
||||
indexCount := 0
|
||||
|
||||
// Generate spiral indices
|
||||
center := float64(length) / 2
|
||||
theta := 0.0
|
||||
for i := 0; i < length*2; i++ {
|
||||
radius := math.Exp(c.spiralFactor * theta)
|
||||
x := int(math.Round(center + radius*math.Cos(theta)))
|
||||
if x < 0 {
|
||||
x = 0
|
||||
}
|
||||
if x >= length {
|
||||
x = length - 1
|
||||
}
|
||||
if !used[x] {
|
||||
result[indexCount] = x
|
||||
used[x] = true
|
||||
indexCount++
|
||||
}
|
||||
theta += 0.1
|
||||
if indexCount >= length {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Fill remaining indices
|
||||
for i := 0; i < length; i++ {
|
||||
if !used[i] {
|
||||
result[indexCount] = i
|
||||
used[i] = true
|
||||
indexCount++
|
||||
}
|
||||
}
|
||||
|
||||
// Validate permutation
|
||||
count := make(map[int]int)
|
||||
for _, idx := range result {
|
||||
count[idx]++
|
||||
if idx < 0 || idx >= length {
|
||||
debugPrintf("Invalid index: %d\n", idx)
|
||||
}
|
||||
if count[idx] > 1 {
|
||||
debugPrintf("Duplicate index: %d\n", idx)
|
||||
}
|
||||
}
|
||||
if indexCount != length || len(count) != length {
|
||||
debugPrintf("Error: Spiral indices invalid: count %d, unique %d, want %d\n", indexCount, len(count), length)
|
||||
// Fallback to sequential
|
||||
for i := 0; i < length; i++ {
|
||||
result[i] = i
|
||||
}
|
||||
}
|
||||
|
||||
// Debug: Print first N indices
|
||||
logLen := length
|
||||
if logLen > 10 {
|
||||
logLen = 10
|
||||
}
|
||||
debugPrintf("Spiral indices (first %d): %v\n", logLen, result[:logLen])
|
||||
return result
|
||||
}
|
||||
|
||||
// formClusters groups bytes into clusters based on spiral proximity
|
||||
func (c *MOSCCompressor) formClusters(data []byte, indices []int) ([][]byte, []float64) {
|
||||
var clusters [][]byte
|
||||
var affinities []float64
|
||||
for i := 0; i < len(indices); i += c.clusterSize {
|
||||
end := i + c.clusterSize
|
||||
if end > len(indices) {
|
||||
end = len(indices)
|
||||
}
|
||||
cluster := make([]byte, 0, c.clusterSize)
|
||||
for j := i; j < end; j++ {
|
||||
cluster = append(cluster, data[indices[j]])
|
||||
}
|
||||
clusters = append(clusters, cluster)
|
||||
freq := make(map[byte]int)
|
||||
for _, b := range cluster {
|
||||
freq[b]++
|
||||
}
|
||||
affinity := 0.0
|
||||
for _, count := range freq {
|
||||
prob := float64(count) / float64(len(cluster))
|
||||
affinity -= prob * math.Log2(prob+1e-10)
|
||||
}
|
||||
affinities = append(affinities, affinity)
|
||||
}
|
||||
return clusters, affinities
|
||||
}
|
||||
|
||||
// detectFractalPatterns identifies self-similar patterns
|
||||
func (c *MOSCCompressor) detectFractalPatterns(clusters [][]byte) map[int][]int {
|
||||
fractalMap := make(map[int][]int)
|
||||
if c.maxFractal == 0 {
|
||||
return fractalMap
|
||||
}
|
||||
for depth := 1; depth <= c.maxFractal; depth++ {
|
||||
for i := 0; i < len(clusters); i++ {
|
||||
for j := 0; j < i; j++ {
|
||||
if c.isFractalSimilar(clusters[i], clusters[j], depth) {
|
||||
// Ensure reference cluster is valid
|
||||
if len(clusters[j]) == len(clusters[i]) {
|
||||
fractalMap[i] = append(fractalMap[i], j)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Debug: Log fractal references
|
||||
for i, refs := range fractalMap {
|
||||
if i < 5 || (i >= 275 && i <= 280) {
|
||||
debugPrintf("Fractal map cluster %d: refs=%v\n", i, refs)
|
||||
}
|
||||
}
|
||||
return fractalMap
|
||||
}
|
||||
|
||||
// isFractalSimilar checks if two clusters are similar at a given depth
|
||||
func (c *MOSCCompressor) isFractalSimilar(c1, c2 []byte, depth int) bool {
|
||||
if len(c1) != len(c2) {
|
||||
return false
|
||||
}
|
||||
if depth == 0 {
|
||||
return bytes.Equal(c1, c2)
|
||||
}
|
||||
mid := len(c1) / 2
|
||||
return c.isFractalSimilar(c1[:mid], c2[:mid], depth-1) &&
|
||||
c.isFractalSimilar(c1[mid:], c2[mid:], depth-1)
|
||||
}
|
||||
|
||||
// buildCodebook creates a probability-based codebook
|
||||
func (c *MOSCCompressor) buildCodebook(affinities []float64, clusters [][]byte) map[int][]byte {
|
||||
codebook := make(map[int][]byte)
|
||||
totalAffinity := 0.0
|
||||
for _, aff := range affinities {
|
||||
totalAffinity += math.Exp(aff)
|
||||
}
|
||||
for i, aff := range affinities {
|
||||
prob := math.Exp(aff) / totalAffinity
|
||||
if prob > 0.1 && i < len(clusters) { // Stricter threshold
|
||||
codebook[i] = clusters[i]
|
||||
}
|
||||
}
|
||||
return codebook
|
||||
}
|
||||
|
||||
// encodeClusters encodes clusters using the codebook and fractal map
|
||||
func (c *MOSCCompressor) encodeClusters(clusters [][]byte, fractalMap map[int][]int, codebook map[int][]byte) ([]byte, error) {
|
||||
var output bytes.Buffer
|
||||
rawCount, codebookCount, fractalCount := 0, 0, 0
|
||||
for i, cluster := range clusters {
|
||||
var encodingType string
|
||||
if refs, ok := fractalMap[i]; ok && len(refs) > 0 {
|
||||
output.WriteByte(0xFE)
|
||||
output.WriteByte(byte(refs[0]))
|
||||
encodingType = fmt.Sprintf("fractal ref=%d", refs[0])
|
||||
fractalCount++
|
||||
} else if _, ok := codebook[i]; ok {
|
||||
output.WriteByte(0xFF)
|
||||
output.WriteByte(byte(i))
|
||||
encodingType = "codebook"
|
||||
codebookCount++
|
||||
} else {
|
||||
output.WriteByte(0x00)
|
||||
output.WriteByte(byte(len(cluster)))
|
||||
output.Write(cluster)
|
||||
encodingType = "raw"
|
||||
rawCount++
|
||||
}
|
||||
// Debug: Log encoding type for clusters 0-4 and 275-280
|
||||
if i < 5 || (i >= 275 && i <= 280) {
|
||||
debugPrintf("Encode cluster %d: type=%s\n", i, encodingType)
|
||||
}
|
||||
}
|
||||
// Debug: Log encoding stats
|
||||
debugPrintf("Encoding stats: raw=%d, codebook=%d, fractal=%d\n", rawCount, codebookCount, fractalCount)
|
||||
return output.Bytes(), nil
|
||||
}
|
||||
|
||||
// Decompress decompresses the data
|
||||
func (c *MOSCCompressor) Decompress(compressed []byte) ([]byte, error) {
|
||||
if len(compressed) < 16 {
|
||||
return nil, fmt.Errorf("invalid compressed data")
|
||||
}
|
||||
|
||||
reader := bytes.NewReader(compressed)
|
||||
var header struct {
|
||||
DataLen uint32
|
||||
ClusterCount uint32
|
||||
CodebookLen uint32
|
||||
SpiralFactor float64
|
||||
}
|
||||
if err := binary.Read(reader, binary.BigEndian, &header); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.spiralFactor = header.SpiralFactor
|
||||
|
||||
codebook := make(map[int][]byte)
|
||||
for i := 0; i < int(header.CodebookLen); i++ {
|
||||
code, err := reader.ReadByte()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
length, err := reader.ReadByte()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
seq := make([]byte, length)
|
||||
if _, err := reader.Read(seq); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
codebook[int(code)] = seq
|
||||
}
|
||||
|
||||
clusters := make([][]byte, header.ClusterCount)
|
||||
for i := 0; i < int(header.ClusterCount); i++ {
|
||||
marker, err := reader.ReadByte()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
switch marker {
|
||||
case 0xFE:
|
||||
ref, err := reader.ReadByte()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if int(ref) >= i {
|
||||
return nil, fmt.Errorf("invalid fractal reference: %d", ref)
|
||||
}
|
||||
clusters[i] = clusters[ref]
|
||||
case 0xFF:
|
||||
code, err := reader.ReadByte()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if seq, ok := codebook[int(code)]; ok {
|
||||
clusters[i] = seq
|
||||
} else {
|
||||
return nil, fmt.Errorf("invalid codebook code: %d", code)
|
||||
}
|
||||
case 0x00:
|
||||
length, err := reader.ReadByte()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cluster := make([]byte, length)
|
||||
if _, err := reader.Read(cluster); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
clusters[i] = cluster
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown marker: %x", marker)
|
||||
}
|
||||
// Debug: Log first 5 clusters and clusters 275-280
|
||||
if i < 5 || (i >= 275 && i <= 280) {
|
||||
debugPrintf("Decompress cluster %d: %v\n", i, clusters[i])
|
||||
}
|
||||
}
|
||||
|
||||
spiralIndices := c.generateSpiralIndices(int(header.DataLen))
|
||||
data := make([]byte, header.DataLen)
|
||||
clusterIdx := 0
|
||||
clusterPos := 0
|
||||
for i, idx := range spiralIndices {
|
||||
if clusterIdx >= len(clusters) {
|
||||
return nil, fmt.Errorf("insufficient clusters at index %d", i)
|
||||
}
|
||||
if clusterPos >= len(clusters[clusterIdx]) {
|
||||
clusterIdx++
|
||||
clusterPos = 0
|
||||
if clusterIdx >= len(clusters) {
|
||||
return nil, fmt.Errorf("insufficient clusters at index %d", i)
|
||||
}
|
||||
}
|
||||
if idx < 0 || idx >= len(data) {
|
||||
return nil, fmt.Errorf("invalid spiral index %d at position %d", idx, i)
|
||||
}
|
||||
data[idx] = clusters[clusterIdx][clusterPos]
|
||||
// Debug: Log positions 0-9 and 2212-2222
|
||||
if i < 10 || (i >= 2212 && i <= 2222) {
|
||||
debugPrintf("Position %d: idx=%d, clusterIdx=%d, clusterPos=%d, byte=%d\n", i, idx, clusterIdx, clusterPos, data[idx])
|
||||
}
|
||||
clusterPos++
|
||||
}
|
||||
logLen := len(data)
|
||||
if logLen > 100 {
|
||||
logLen = 100
|
||||
}
|
||||
debugPrintf("Decompressed first %d bytes: %v\n", logLen, data[:logLen])
|
||||
return data, nil
|
||||
}
|
96
middleout/mosc_test.go
Normal file
96
middleout/mosc_test.go
Normal file
@ -0,0 +1,96 @@
|
||||
package middleout
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestMOSC(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data []byte
|
||||
spiralFactor float64
|
||||
clusterSize int
|
||||
maxFractal int
|
||||
}{
|
||||
{
|
||||
name: "Repetitive",
|
||||
data: []byte(strings.Repeat("abcd", 6)), // 24 bytes
|
||||
spiralFactor: 0.1,
|
||||
clusterSize: 8,
|
||||
maxFractal: 3,
|
||||
},
|
||||
{
|
||||
name: "Random",
|
||||
data: []byte("abcdefghijklmnopqrstuvwxyz"), // 26 bytes
|
||||
spiralFactor: 0.1,
|
||||
clusterSize: 8,
|
||||
maxFractal: 3,
|
||||
},
|
||||
{
|
||||
name: "Large",
|
||||
data: []byte(strings.Repeat("abc", 100)), // 300 bytes
|
||||
spiralFactor: 0.1,
|
||||
clusterSize: 8,
|
||||
maxFractal: 3,
|
||||
},
|
||||
{
|
||||
name: "Short",
|
||||
data: []byte("abc"), // 3 bytes
|
||||
spiralFactor: 0.1,
|
||||
clusterSize: 8,
|
||||
maxFractal: 3,
|
||||
},
|
||||
{
|
||||
name: "VeryLarge",
|
||||
data: []byte(strings.Repeat("abcd", 1000)), // 4000 bytes
|
||||
spiralFactor: 0.1,
|
||||
clusterSize: 8,
|
||||
maxFractal: 0, // Disable fractal to isolate issue
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
compressor := NewMOSCCompressor(tt.spiralFactor, tt.clusterSize, tt.maxFractal)
|
||||
compressed, err := compressor.Compress(tt.data)
|
||||
if err != nil {
|
||||
t.Fatalf("Compression error: %v", err)
|
||||
}
|
||||
t.Logf("Original size: %d bytes, Compressed size: %d bytes, Ratio: %.2f%%",
|
||||
len(tt.data), len(compressed), (float64(len(compressed))/float64(len(tt.data))*100))
|
||||
|
||||
decompressed, err := compressor.Decompress(compressed)
|
||||
if err != nil {
|
||||
t.Fatalf("Decompression error: %v", err)
|
||||
}
|
||||
if !bytes.Equal(decompressed, tt.data) {
|
||||
t.Errorf("Decompressed data does not match original")
|
||||
logLen := len(decompressed)
|
||||
if logLen > 100 {
|
||||
logLen = 100
|
||||
}
|
||||
t.Logf("Decompressed first %d bytes: %v", logLen, decompressed[:logLen])
|
||||
for i := 0; i < len(decompressed) && i < len(tt.data); i++ {
|
||||
if decompressed[i] != tt.data[i] {
|
||||
start := i - 5
|
||||
if start < 0 {
|
||||
start = 0
|
||||
}
|
||||
end := i + 5
|
||||
if end > len(decompressed) {
|
||||
end = len(decompressed)
|
||||
}
|
||||
if end > len(tt.data) {
|
||||
end = len(tt.data)
|
||||
}
|
||||
t.Errorf("First mismatch at position %d: got %d, want %d; surrounding got %v, want %v",
|
||||
i, decompressed[i], tt.data[i], decompressed[start:end], tt.data[start:end])
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user