mosc/middleout/mosc.go
2025-06-08 02:18:05 -04:00

392 lines
12 KiB
Go

package middleout
import (
"bytes"
"encoding/binary"
"fmt"
"math"
"os"
)
// debugPrintf prints debug messages only if DEBUG=1 is set
func debugPrintf(format string, args ...interface{}) {
if os.Getenv("DEBUG") == "1" {
fmt.Printf(format, args...)
}
}
// MOSCCompressor handles Middle Out Spiral Compression
type MOSCCompressor struct {
spiralFactor float64 // Controls spiral tightness
clusterSize int // Maximum bytes per cluster
maxFractal int // Maximum fractal recursion depth
}
// NewMOSCCompressor initializes a new compressor
func NewMOSCCompressor(spiralFactor float64, clusterSize, maxFractal int) *MOSCCompressor {
if clusterSize < 4 {
clusterSize = 4
}
return &MOSCCompressor{
spiralFactor: spiralFactor,
clusterSize: clusterSize,
maxFractal: maxFractal,
}
}
// Compress compresses the input data using MOSC
func (c *MOSCCompressor) Compress(data []byte) ([]byte, error) {
if len(data) == 0 {
return nil, fmt.Errorf("empty input")
}
// Generate spiral indices
spiralIndices := c.generateSpiralIndices(len(data))
// Cluster bytes based on spiral traversal
clusters, affinities := c.formClusters(data, spiralIndices)
// Debug: Log first 5 clusters and clusters 275-280
for i := 0; i < len(clusters); i++ {
if i < 5 || (i >= 275 && i <= 280) {
debugPrintf("Compress cluster %d: %v\n", i, clusters[i])
}
}
// Detect fractal patterns
fractalMap := c.detectFractalPatterns(clusters)
// Build probability-based codebook
codebook := c.buildCodebook(affinities, clusters)
// Encode clusters
encoded, err := c.encodeClusters(clusters, fractalMap, codebook)
if err != nil {
return nil, err
}
// Combine output: header + codebook + encoded data
var output bytes.Buffer
header := struct {
DataLen uint32
ClusterCount uint32
CodebookLen uint32
SpiralFactor float64
}{
DataLen: uint32(len(data)),
ClusterCount: uint32(len(clusters)),
CodebookLen: uint32(len(codebook)),
SpiralFactor: c.spiralFactor,
}
if err := binary.Write(&output, binary.BigEndian, &header); err != nil {
return nil, err
}
// Write codebook
for code, seq := range codebook {
output.WriteByte(byte(code))
output.WriteByte(byte(len(seq)))
output.Write(seq)
}
// Write encoded data
output.Write(encoded)
return output.Bytes(), nil
}
// generateSpiralIndices creates a spiral traversal order
func (c *MOSCCompressor) generateSpiralIndices(length int) []int {
result := make([]int, length)
used := make(map[int]bool)
indexCount := 0
// Generate spiral indices
center := float64(length) / 2
theta := 0.0
for i := 0; i < length*2; i++ {
radius := math.Exp(c.spiralFactor * theta)
x := int(math.Round(center + radius*math.Cos(theta)))
if x < 0 {
x = 0
}
if x >= length {
x = length - 1
}
if !used[x] {
result[indexCount] = x
used[x] = true
indexCount++
}
theta += 0.1
if indexCount >= length {
break
}
}
// Fill remaining indices
for i := 0; i < length; i++ {
if !used[i] {
result[indexCount] = i
used[i] = true
indexCount++
}
}
// Validate permutation
count := make(map[int]int)
for _, idx := range result {
count[idx]++
if idx < 0 || idx >= length {
debugPrintf("Invalid index: %d\n", idx)
}
if count[idx] > 1 {
debugPrintf("Duplicate index: %d\n", idx)
}
}
if indexCount != length || len(count) != length {
debugPrintf("Error: Spiral indices invalid: count %d, unique %d, want %d\n", indexCount, len(count), length)
// Fallback to sequential
for i := 0; i < length; i++ {
result[i] = i
}
}
// Debug: Print first N indices
logLen := length
if logLen > 10 {
logLen = 10
}
debugPrintf("Spiral indices (first %d): %v\n", logLen, result[:logLen])
return result
}
// formClusters groups bytes into clusters based on spiral proximity
func (c *MOSCCompressor) formClusters(data []byte, indices []int) ([][]byte, []float64) {
var clusters [][]byte
var affinities []float64
for i := 0; i < len(indices); i += c.clusterSize {
end := i + c.clusterSize
if end > len(indices) {
end = len(indices)
}
cluster := make([]byte, 0, c.clusterSize)
for j := i; j < end; j++ {
cluster = append(cluster, data[indices[j]])
}
clusters = append(clusters, cluster)
freq := make(map[byte]int)
for _, b := range cluster {
freq[b]++
}
affinity := 0.0
for _, count := range freq {
prob := float64(count) / float64(len(cluster))
affinity -= prob * math.Log2(prob+1e-10)
}
affinities = append(affinities, affinity)
}
return clusters, affinities
}
// detectFractalPatterns identifies self-similar patterns
func (c *MOSCCompressor) detectFractalPatterns(clusters [][]byte) map[int][]int {
fractalMap := make(map[int][]int)
if c.maxFractal == 0 {
return fractalMap
}
for depth := 1; depth <= c.maxFractal; depth++ {
for i := 0; i < len(clusters); i++ {
for j := 0; j < i; j++ {
if c.isFractalSimilar(clusters[i], clusters[j], depth) {
// Ensure reference cluster is valid
if len(clusters[j]) == len(clusters[i]) {
fractalMap[i] = append(fractalMap[i], j)
}
}
}
}
}
// Debug: Log fractal references
for i, refs := range fractalMap {
if i < 5 || (i >= 275 && i <= 280) {
debugPrintf("Fractal map cluster %d: refs=%v\n", i, refs)
}
}
return fractalMap
}
// isFractalSimilar checks if two clusters are similar at a given depth
func (c *MOSCCompressor) isFractalSimilar(c1, c2 []byte, depth int) bool {
if len(c1) != len(c2) {
return false
}
if depth == 0 {
return bytes.Equal(c1, c2)
}
mid := len(c1) / 2
return c.isFractalSimilar(c1[:mid], c2[:mid], depth-1) &&
c.isFractalSimilar(c1[mid:], c2[mid:], depth-1)
}
// buildCodebook creates a probability-based codebook
func (c *MOSCCompressor) buildCodebook(affinities []float64, clusters [][]byte) map[int][]byte {
codebook := make(map[int][]byte)
totalAffinity := 0.0
for _, aff := range affinities {
totalAffinity += math.Exp(aff)
}
for i, aff := range affinities {
prob := math.Exp(aff) / totalAffinity
if prob > 0.1 && i < len(clusters) { // Stricter threshold
codebook[i] = clusters[i]
}
}
return codebook
}
// encodeClusters encodes clusters using the codebook and fractal map
func (c *MOSCCompressor) encodeClusters(clusters [][]byte, fractalMap map[int][]int, codebook map[int][]byte) ([]byte, error) {
var output bytes.Buffer
rawCount, codebookCount, fractalCount := 0, 0, 0
for i, cluster := range clusters {
var encodingType string
if refs, ok := fractalMap[i]; ok && len(refs) > 0 {
output.WriteByte(0xFE)
output.WriteByte(byte(refs[0]))
encodingType = fmt.Sprintf("fractal ref=%d", refs[0])
fractalCount++
} else if _, ok := codebook[i]; ok {
output.WriteByte(0xFF)
output.WriteByte(byte(i))
encodingType = "codebook"
codebookCount++
} else {
output.WriteByte(0x00)
output.WriteByte(byte(len(cluster)))
output.Write(cluster)
encodingType = "raw"
rawCount++
}
// Debug: Log encoding type for clusters 0-4 and 275-280
if i < 5 || (i >= 275 && i <= 280) {
debugPrintf("Encode cluster %d: type=%s\n", i, encodingType)
}
}
// Debug: Log encoding stats
debugPrintf("Encoding stats: raw=%d, codebook=%d, fractal=%d\n", rawCount, codebookCount, fractalCount)
return output.Bytes(), nil
}
// Decompress decompresses the data
func (c *MOSCCompressor) Decompress(compressed []byte) ([]byte, error) {
if len(compressed) < 16 {
return nil, fmt.Errorf("invalid compressed data")
}
reader := bytes.NewReader(compressed)
var header struct {
DataLen uint32
ClusterCount uint32
CodebookLen uint32
SpiralFactor float64
}
if err := binary.Read(reader, binary.BigEndian, &header); err != nil {
return nil, err
}
c.spiralFactor = header.SpiralFactor
codebook := make(map[int][]byte)
for i := 0; i < int(header.CodebookLen); i++ {
code, err := reader.ReadByte()
if err != nil {
return nil, err
}
length, err := reader.ReadByte()
if err != nil {
return nil, err
}
seq := make([]byte, length)
if _, err := reader.Read(seq); err != nil {
return nil, err
}
codebook[int(code)] = seq
}
clusters := make([][]byte, header.ClusterCount)
for i := 0; i < int(header.ClusterCount); i++ {
marker, err := reader.ReadByte()
if err != nil {
return nil, err
}
switch marker {
case 0xFE:
ref, err := reader.ReadByte()
if err != nil {
return nil, err
}
if int(ref) >= i {
return nil, fmt.Errorf("invalid fractal reference: %d", ref)
}
clusters[i] = clusters[ref]
case 0xFF:
code, err := reader.ReadByte()
if err != nil {
return nil, err
}
if seq, ok := codebook[int(code)]; ok {
clusters[i] = seq
} else {
return nil, fmt.Errorf("invalid codebook code: %d", code)
}
case 0x00:
length, err := reader.ReadByte()
if err != nil {
return nil, err
}
cluster := make([]byte, length)
if _, err := reader.Read(cluster); err != nil {
return nil, err
}
clusters[i] = cluster
default:
return nil, fmt.Errorf("unknown marker: %x", marker)
}
// Debug: Log first 5 clusters and clusters 275-280
if i < 5 || (i >= 275 && i <= 280) {
debugPrintf("Decompress cluster %d: %v\n", i, clusters[i])
}
}
spiralIndices := c.generateSpiralIndices(int(header.DataLen))
data := make([]byte, header.DataLen)
clusterIdx := 0
clusterPos := 0
for i, idx := range spiralIndices {
if clusterIdx >= len(clusters) {
return nil, fmt.Errorf("insufficient clusters at index %d", i)
}
if clusterPos >= len(clusters[clusterIdx]) {
clusterIdx++
clusterPos = 0
if clusterIdx >= len(clusters) {
return nil, fmt.Errorf("insufficient clusters at index %d", i)
}
}
if idx < 0 || idx >= len(data) {
return nil, fmt.Errorf("invalid spiral index %d at position %d", idx, i)
}
data[idx] = clusters[clusterIdx][clusterPos]
// Debug: Log positions 0-9 and 2212-2222
if i < 10 || (i >= 2212 && i <= 2222) {
debugPrintf("Position %d: idx=%d, clusterIdx=%d, clusterPos=%d, byte=%d\n", i, idx, clusterIdx, clusterPos, data[idx])
}
clusterPos++
}
logLen := len(data)
if logLen > 100 {
logLen = 100
}
debugPrintf("Decompressed first %d bytes: %v\n", logLen, data[:logLen])
return data, nil
}