acommpelish jimeng AI refactor for PC

This commit is contained in:
GeekMaster
2025-09-12 18:58:52 +08:00
parent c5badb3e13
commit 2c6eee7fc1
13 changed files with 1049 additions and 266 deletions

817
api/utils/media_duration.go Normal file
View File

@@ -0,0 +1,817 @@
package utils
import (
"context"
"encoding/binary"
"errors"
"fmt"
"io"
"net/http"
"os"
"time"
)
// AudioDuration returns duration of an audio file.
// Supported formats: MP3, WAV (auto-detected by header)
func AudioDuration(path string) (time.Duration, error) {
f, err := os.Open(path)
if err != nil {
return 0, err
}
defer f.Close()
// Peek first 12 bytes to detect format
head := make([]byte, 12)
n, err := io.ReadFull(f, head)
if err != nil {
return 0, err
}
if n < 12 {
return 0, errors.New("file too small")
}
// WAV: RIFF....WAVE
if string(head[0:4]) == "RIFF" && string(head[8:12]) == "WAVE" {
if _, err := f.Seek(0, io.SeekStart); err != nil {
return 0, err
}
return wavDuration(f)
}
// MP3 can start with ID3 or frame sync 0xFFEx
if string(head[0:3]) == "ID3" || (head[0] == 0xFF && (head[1]&0xE0) == 0xE0) {
if _, err := f.Seek(0, io.SeekStart); err != nil {
return 0, err
}
return mp3Duration(f)
}
return 0, errors.New("unsupported audio format")
}
// AudioDurationFromURL downloads the url to a temp file and returns duration.
func AudioDurationFromURL(url string) (time.Duration, error) {
path, err := fetchURLToTemp(url, 30*time.Second)
if err != nil {
return 0, err
}
defer os.Remove(path)
return AudioDuration(path)
}
// VideoDurationMP4 returns duration of an MP4 file (MOV/MP4 base media).
func VideoDurationMP4(path string) (time.Duration, error) {
f, err := os.Open(path)
if err != nil {
return 0, err
}
defer f.Close()
return mp4Duration(f)
}
// VideoDurationMP4FromURL downloads the url to a temp file and returns duration.
func VideoDurationMP4FromURL(url string) (time.Duration, error) {
path, err := fetchURLToTemp(url, 30*time.Second)
if err != nil {
return 0, err
}
defer os.Remove(path)
return VideoDurationMP4(path)
}
// ---------------------- helpers ----------------------
func fetchURLToTemp(url string, timeout time.Duration) (string, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return "", err
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("http status: %d", resp.StatusCode)
}
tmp, err := os.CreateTemp("", "media-*")
if err != nil {
return "", err
}
defer tmp.Close()
if _, err := io.Copy(tmp, resp.Body); err != nil {
path := tmp.Name()
_ = os.Remove(path)
return "", err
}
return tmp.Name(), nil
}
// ---------------------- WAV ----------------------
func wavDuration(r io.ReadSeeker) (time.Duration, error) {
// RIFF header already checked outside if needed. We parse chunks to get fmt and data.
// WAV little-endian
if _, err := r.Seek(0, io.SeekStart); err != nil {
return 0, err
}
// Read RIFF header (12 bytes)
head := make([]byte, 12)
if _, err := io.ReadFull(r, head); err != nil {
return 0, err
}
if string(head[0:4]) != "RIFF" || string(head[8:12]) != "WAVE" {
return 0, errors.New("invalid wav header")
}
var sampleRate uint32
var numChans uint16
var bitsPerSample uint16
var byteRate uint32
var dataSize uint32
for {
chunkHdr := make([]byte, 8)
if _, err := io.ReadFull(r, chunkHdr); err != nil {
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
break
}
return 0, err
}
ckID := string(chunkHdr[0:4])
ckSize := binary.LittleEndian.Uint32(chunkHdr[4:8])
switch ckID {
case "fmt ":
fmtData := make([]byte, ckSize)
if _, err := io.ReadFull(r, fmtData); err != nil {
return 0, err
}
// audioFormat := binary.LittleEndian.Uint16(fmtData[0:2]) // 1 = PCM
numChans = binary.LittleEndian.Uint16(fmtData[2:4])
sampleRate = binary.LittleEndian.Uint32(fmtData[4:8])
byteRate = binary.LittleEndian.Uint32(fmtData[8:12])
// blockAlign := binary.LittleEndian.Uint16(fmtData[12:14])
if len(fmtData) >= 16 {
bitsPerSample = binary.LittleEndian.Uint16(fmtData[14:16])
}
case "data":
dataSize = ckSize
// Skip data content
if _, err := r.Seek(int64(ckSize), io.SeekCurrent); err != nil {
return 0, err
}
default:
// Skip other chunks
if _, err := r.Seek(int64(ckSize), io.SeekCurrent); err != nil {
return 0, err
}
}
// Chunks are word-aligned (pad byte if odd size)
if ckSize%2 == 1 {
if _, err := r.Seek(1, io.SeekCurrent); err != nil { // skip pad
return 0, err
}
}
}
if sampleRate == 0 || numChans == 0 {
return 0, errors.New("invalid wav fmt")
}
var durationSeconds float64
if byteRate != 0 {
durationSeconds = float64(dataSize) / float64(byteRate)
} else {
bytesPerSec := float64(sampleRate) * float64(numChans) * float64(bitsPerSample) / 8.0
if bytesPerSec == 0 {
return 0, errors.New("invalid wav parameters")
}
durationSeconds = float64(dataSize) / bytesPerSec
}
return time.Duration(durationSeconds * float64(time.Second)), nil
}
// ---------------------- MP3 ----------------------
func mp3Duration(r io.ReadSeeker) (time.Duration, error) {
// Strategy:
// 1) Skip ID3v2 header if present.
// 2) Try read first frame and detect XING/Info or VBRI to get total frames and duration.
// 3) If VBR headers not present, fall back to CBR estimation: (audioDataBytes * 8) / bitrate.
// File size
fi, err := fileSizeFromSeeker(r)
if err != nil {
return 0, err
}
// Skip ID3v2
var id3v2Size int64
if _, err := r.Seek(0, io.SeekStart); err != nil {
return 0, err
}
id3v2Size, err = skipID3v2(r)
if err != nil {
return 0, err
}
// Remember audio start offset
startOffset, _ := r.Seek(0, io.SeekCurrent)
// Read first frame header (search sync)
off, fh, err := findNextMP3Frame(r)
if err != nil {
return 0, err
}
if _, err := r.Seek(off, io.SeekStart); err != nil {
return 0, err
}
// Check for XING/Info header in first frame (for VBR)
totalFrames, sampleRate, samplesPerFrame, bitrateKbps, vbrFound, err := parseFirstFrameForVBR(r, fh)
if err != nil {
return 0, err
}
if vbrFound && totalFrames > 0 && sampleRate > 0 && samplesPerFrame > 0 {
seconds := (float64(totalFrames) * float64(samplesPerFrame)) / float64(sampleRate)
return time.Duration(seconds * float64(time.Second)), nil
}
// Fall back to CBR estimate using bitrate and data size (excluding ID3v2 and ID3v1)
// Detect ID3v1 at end (128 bytes TAG)
var id3v1Size int64
if fi >= 128 {
if _, err := r.Seek(fi-128, io.SeekStart); err == nil {
buf := make([]byte, 3)
if _, err := io.ReadFull(r, buf); err == nil {
if string(buf) == "TAG" {
id3v1Size = 128
}
}
}
}
audioBytes := fi - id3v2Size - id3v1Size - startOffset
if audioBytes <= 0 || bitrateKbps == 0 {
return 0, errors.New("unable to estimate mp3 duration")
}
// bitrateKbps in kbps, bytes -> bits
seconds := float64(audioBytes*8) / float64(bitrateKbps*1000)
return time.Duration(seconds * float64(time.Second)), nil
}
type mp3FrameHeader struct {
Version int // 1: MPEG1, 2: MPEG2, 25: MPEG2.5
Layer int // 1,2,3
BitrateKbps int
SampleRate int
Padding int
ChannelMode int // 0:Stereo,1:Joint,2:Dual,3:Mono
}
func findNextMP3Frame(r io.ReadSeeker) (int64, mp3FrameHeader, error) {
var hdr mp3FrameHeader
// Start from current pos and scan up to 64KB
start, _ := r.Seek(0, io.SeekCurrent)
limit := int64(64 * 1024)
buf := make([]byte, limit)
n, err := r.Read(buf)
if err != nil && !errors.Is(err, io.EOF) {
return 0, hdr, err
}
for i := 0; i+4 <= n; i++ {
if buf[i] == 0xFF && (buf[i+1]&0xE0) == 0xE0 { // sync
if h, ok := parseMP3Header(buf[i : i+4]); ok {
offset := start + int64(i)
return offset, h, nil
}
}
}
return 0, hdr, errors.New("mp3 frame not found")
}
func parseMP3Header(b []byte) (mp3FrameHeader, bool) {
var h mp3FrameHeader
if len(b) < 4 {
return h, false
}
if b[0] != 0xFF || (b[1]&0xE0) != 0xE0 {
return h, false
}
versionBits := (b[1] >> 3) & 0x03
layerBits := (b[1] >> 1) & 0x03
bitrateBits := (b[2] >> 4) & 0x0F
sampleRateBits := (b[2] >> 2) & 0x03
paddingBit := (b[2] >> 1) & 0x01
channelMode := (b[3] >> 6) & 0x03
var version int
switch versionBits {
case 0x00:
version = 25 // MPEG 2.5
case 0x02:
version = 2 // MPEG 2
case 0x03:
version = 1 // MPEG 1
default:
return h, false
}
var layer int
switch layerBits {
case 0x01:
layer = 3
case 0x02:
layer = 2
case 0x03:
layer = 1
default:
return h, false
}
br := mp3BitrateKbps(version, layer, int(bitrateBits))
if br == 0 {
return h, false
}
sr := mp3SampleRate(version, int(sampleRateBits))
if sr == 0 {
return h, false
}
h = mp3FrameHeader{
Version: version,
Layer: layer,
BitrateKbps: br,
SampleRate: sr,
Padding: int(paddingBit),
ChannelMode: int(channelMode),
}
return h, true
}
func mp3BitrateKbps(version, layer, index int) int {
// index: 1..14 valid; 0,15 invalid
if index <= 0 || index == 15 {
return 0
}
// Tables per ISO/IEC 11172-3/13818-3 (common subset)
var tbl [15]int
if layer == 1 { // Layer I
if version == 1 { // MPEG1
tbl = [15]int{0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448}
} else { // MPEG2/2.5
tbl = [15]int{0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256}
}
} else if layer == 2 { // Layer II
if version == 1 {
tbl = [15]int{0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384}
} else {
tbl = [15]int{0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160}
}
} else { // Layer III
if version == 1 {
tbl = [15]int{0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320}
} else {
tbl = [15]int{0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160}
}
}
return tbl[index]
}
func mp3SampleRate(version, index int) int {
if index == 3 {
return 0
}
// base table for MPEG1
base := [3]int{44100, 48000, 32000}
sr := base[index]
if version == 2 { // MPEG2
sr /= 2
} else if version == 25 { // MPEG2.5
sr /= 4
}
return sr
}
func samplesPerMP3Frame(version, layer int) int {
switch layer {
case 1:
return 384
case 2:
return 1152
case 3:
if version == 1 {
return 1152
}
return 576 // MPEG2/2.5 Layer III
default:
return 0
}
}
func parseFirstFrameForVBR(r io.ReadSeeker, fh mp3FrameHeader) (totalFrames uint32, sampleRate int, samplesPerFrame int, bitrateKbps int, vbrFound bool, err error) {
// After the 4-byte header, possible side info and then XING/Info
if _, err = r.Seek(0, io.SeekCurrent); err != nil {
return
}
// Re-read header
hdr := make([]byte, 4)
if _, err = io.ReadFull(r, hdr); err != nil {
return
}
// side info size depends on MPEG version and channel mode (for Layer III)
sideInfoSize := 0
if fh.Layer == 3 { // Layer III
if fh.Version == 1 { // MPEG1
if fh.ChannelMode == 3 { // mono
sideInfoSize = 17
} else {
sideInfoSize = 32
}
} else { // MPEG2/2.5
if fh.ChannelMode == 3 {
sideInfoSize = 9
} else {
sideInfoSize = 17
}
}
}
// Read next up to 120 bytes to search for XING/Info or VBRI
buf := make([]byte, sideInfoSize+120)
if _, err = io.ReadFull(r, buf); err != nil {
// If short, still try within available
if !errors.Is(err, io.ErrUnexpectedEOF) && !errors.Is(err, io.EOF) {
return
}
}
// Search XING/Info signature
sigs := [][]byte{[]byte("Xing"), []byte("Info")}
for _, sig := range sigs {
idx := indexOf(buf, sig)
if idx >= 0 {
// flags after signature (4 bytes), then if frames flag set, 4 bytes frames
if len(buf) >= idx+4+4 {
flags := binary.BigEndian.Uint32(buf[idx+4 : idx+8])
var frames uint32
if (flags & 0x01) != 0 { // frames present
if len(buf) >= idx+8+4 {
frames = binary.BigEndian.Uint32(buf[idx+8 : idx+12])
}
}
if frames > 0 {
vbrFound = true
totalFrames = frames
sampleRate = fh.SampleRate
samplesPerFrame = samplesPerMP3Frame(fh.Version, fh.Layer)
bitrateKbps = fh.BitrateKbps
return
}
}
}
}
// Check VBRI (usually at 32 bytes after header for MPEG1 Layer III)
if len(buf) >= 4 {
idx := indexOf(buf, []byte("VBRI"))
if idx >= 0 {
if len(buf) >= idx+4+2+2+4+4 {
// VBRI layout: 'VBRI'(4) + version(2) + delay(2) + quality(2?) varies; but at offset 10 comes bytes: bytes (4), frames (4)
// Some docs: offset 10: bytes, offset 14: frames (big-endian)
bytesOffset := idx + 10
framesOffset := idx + 14
if len(buf) >= framesOffset+4 {
frames := binary.BigEndian.Uint32(buf[framesOffset : framesOffset+4])
if frames > 0 {
vbrFound = true
totalFrames = frames
sampleRate = fh.SampleRate
samplesPerFrame = samplesPerMP3Frame(fh.Version, fh.Layer)
bitrateKbps = fh.BitrateKbps
_ = bytesOffset // not used
return
}
}
}
}
}
// No VBR header. Provide header info for CBR fallback
sampleRate = fh.SampleRate
samplesPerFrame = samplesPerMP3Frame(fh.Version, fh.Layer)
bitrateKbps = fh.BitrateKbps
return
}
func indexOf(haystack []byte, needle []byte) int {
for i := 0; i+len(needle) <= len(haystack); i++ {
match := true
for j := 0; j < len(needle); j++ {
if haystack[i+j] != needle[j] {
match = false
break
}
}
if match {
return i
}
}
return -1
}
func skipID3v2(r io.ReadSeeker) (int64, error) {
if _, err := r.Seek(0, io.SeekStart); err != nil {
return 0, err
}
head := make([]byte, 10)
if _, err := io.ReadFull(r, head); err != nil {
return 0, nil // no header
}
if string(head[0:3]) != "ID3" {
if _, err := r.Seek(0, io.SeekStart); err != nil {
return 0, err
}
return 0, nil
}
// size: 4 synchsafe bytes
sz := int64((int(head[6]&0x7F) << 21) | (int(head[7]&0x7F) << 14) | (int(head[8]&0x7F) << 7) | int(head[9]&0x7F))
// total header size = 10 + sz (+ footer 10 if flag set)
footer := int64(0)
if (head[5] & 0x10) != 0 { // footer present
footer = 10
}
total := 10 + sz + footer
if _, err := r.Seek(total, io.SeekStart); err != nil {
return 0, err
}
return total, nil
}
func fileSizeFromSeeker(r io.ReadSeeker) (int64, error) {
cur, err := r.Seek(0, io.SeekCurrent)
if err != nil {
return 0, err
}
end, err := r.Seek(0, io.SeekEnd)
if err != nil {
return 0, err
}
if _, err := r.Seek(cur, io.SeekStart); err != nil {
return 0, err
}
return end, nil
}
// ---------------------- MP4 ----------------------
type mp4BoxHeader struct {
Size uint64
Type [4]byte
}
func readBoxHeader(r io.ReadSeeker) (mp4BoxHeader, error) {
var h mp4BoxHeader
buf := make([]byte, 8)
if _, err := io.ReadFull(r, buf); err != nil {
return h, err
}
sz := binary.BigEndian.Uint32(buf[0:4])
copy(h.Type[:], buf[4:8])
if sz == 1 {
// 64-bit size follows
ext := make([]byte, 8)
if _, err := io.ReadFull(r, ext); err != nil {
return h, err
}
h.Size = binary.BigEndian.Uint64(ext)
} else {
h.Size = uint64(sz)
}
return h, nil
}
func skipBox(r io.ReadSeeker, boxSize uint64, alreadyRead int64) error {
toSkip := int64(boxSize) - alreadyRead
if toSkip < 0 {
return fmt.Errorf("invalid box size")
}
_, err := r.Seek(toSkip, io.SeekCurrent)
return err
}
func mp4Duration(r io.ReadSeeker) (time.Duration, error) {
if _, err := r.Seek(0, io.SeekStart); err != nil {
return 0, err
}
// Find moov box
var moovStart int64
var moovSize uint64
for {
pos, _ := r.Seek(0, io.SeekCurrent)
h, err := readBoxHeader(r)
if err != nil {
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
break
}
return 0, err
}
if string(h.Type[:]) == "moov" {
moovStart = pos
moovSize = h.Size
break
}
if h.Size < 8 {
return 0, errors.New("invalid mp4 box size")
}
if err := skipBox(r, h.Size, 8); err != nil {
return 0, err
}
}
if moovStart == 0 && moovSize == 0 {
return 0, errors.New("moov not found")
}
// Parse inside moov for video trak mdhd, else mvhd
if _, err := r.Seek(moovStart+8, io.SeekStart); err != nil { // skip moov header
return 0, err
}
end := moovStart + int64(moovSize)
var movieTimescale uint32
var movieDuration uint64
var foundVideoMdhd bool
var mdhdTimescale uint32
var mdhdDuration uint64
for {
pos, _ := r.Seek(0, io.SeekCurrent)
if pos >= end {
break
}
h, err := readBoxHeader(r)
if err != nil {
return 0, err
}
switch string(h.Type[:]) {
case "mvhd":
// movie header
ver := make([]byte, 1)
if _, err := io.ReadFull(r, ver); err != nil {
return 0, err
}
if _, err := r.Seek(3, io.SeekCurrent); err != nil { // flags
return 0, err
}
if ver[0] == 1 {
// version 1: 64-bit duration
buf := make([]byte, 8+8+4+8) // ctime(8) mtime(8) timescale(4) duration(8)
if _, err := io.ReadFull(r, buf); err != nil {
return 0, err
}
movieTimescale = binary.BigEndian.Uint32(buf[16:20])
movieDuration = binary.BigEndian.Uint64(buf[20:28])
} else {
buf := make([]byte, 4+4+4+4) // ctime mtime timescale duration
if _, err := io.ReadFull(r, buf); err != nil {
return 0, err
}
movieTimescale = binary.BigEndian.Uint32(buf[8:12])
movieDuration = uint64(binary.BigEndian.Uint32(buf[12:16]))
}
// skip rest of mvhd
read := int64(1 + 3)
if ver[0] == 1 {
read += int64(8 + 8 + 4 + 8)
} else {
read += int64(4 + 4 + 4 + 4)
}
if err := skipBox(r, h.Size, 8+read); err != nil {
return 0, err
}
case "trak":
// parse trak for hdlr and mdhd
tEnd := int64(0)
if h.Size < 8 {
return 0, errors.New("invalid trak size")
}
tEnd = pos + int64(h.Size)
var isVideo bool
var tMdhdTimescale uint32
var tMdhdDuration uint64
for {
cpos, _ := r.Seek(0, io.SeekCurrent)
if cpos >= tEnd {
break
}
ch, err := readBoxHeader(r)
if err != nil {
return 0, err
}
switch string(ch.Type[:]) {
case "mdia":
mEnd := cpos + int64(ch.Size)
for {
mpos, _ := r.Seek(0, io.SeekCurrent)
if mpos >= mEnd {
break
}
mh, err := readBoxHeader(r)
if err != nil {
return 0, err
}
switch string(mh.Type[:]) {
case "hdlr":
// skip version+flags (4), pre_defined(4)
if _, err := r.Seek(8, io.SeekCurrent); err != nil {
return 0, err
}
handler := make([]byte, 4)
if _, err := io.ReadFull(r, handler); err != nil {
return 0, err
}
if string(handler) == "vide" {
isVideo = true
}
if err := skipBox(r, mh.Size, 8+8+4); err != nil { // header + skipped + read handler
return 0, err
}
case "mdhd":
ver := make([]byte, 1)
if _, err := io.ReadFull(r, ver); err != nil {
return 0, err
}
if _, err := r.Seek(3, io.SeekCurrent); err != nil { // flags
return 0, err
}
if ver[0] == 1 {
buf := make([]byte, 8+8+4+8)
if _, err := io.ReadFull(r, buf); err != nil {
return 0, err
}
tMdhdTimescale = binary.BigEndian.Uint32(buf[16:20])
tMdhdDuration = binary.BigEndian.Uint64(buf[20:28])
} else {
buf := make([]byte, 4+4+4+4)
if _, err := io.ReadFull(r, buf); err != nil {
return 0, err
}
tMdhdTimescale = binary.BigEndian.Uint32(buf[8:12])
tMdhdDuration = uint64(binary.BigEndian.Uint32(buf[12:16]))
}
if err := skipBox(r, mh.Size, 8+1+3+int64(lenVersionPayload(ver[0]))); err != nil {
return 0, err
}
default:
if err := skipBox(r, mh.Size, 8); err != nil {
return 0, err
}
}
}
default:
if err := skipBox(r, ch.Size, 8); err != nil {
return 0, err
}
}
}
if isVideo && tMdhdTimescale != 0 && tMdhdDuration != 0 {
foundVideoMdhd = true
mdhdTimescale = tMdhdTimescale
mdhdDuration = tMdhdDuration
}
// Skip remaining of trak if any
if _, err := r.Seek(tEnd, io.SeekStart); err != nil {
return 0, err
}
default:
if err := skipBox(r, h.Size, 8); err != nil {
return 0, err
}
}
}
if foundVideoMdhd && mdhdTimescale != 0 {
sec := float64(mdhdDuration) / float64(mdhdTimescale)
return time.Duration(sec * float64(time.Second)), nil
}
if movieTimescale != 0 {
sec := float64(movieDuration) / float64(movieTimescale)
return time.Duration(sec * float64(time.Second)), nil
}
return 0, errors.New("failed to read mp4 duration")
}
func lenVersionPayload(ver byte) int {
if ver == 1 {
return 8 + 8 + 4 + 8
}
return 4 + 4 + 4 + 4
}