mirror of
https://github.com/yangjian102621/geekai.git
synced 2026-05-10 19:54:25 +08:00
acommpelish jimeng AI refactor for PC
This commit is contained in:
817
api/utils/media_duration.go
Normal file
817
api/utils/media_duration.go
Normal file
@@ -0,0 +1,817 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
)
|
||||
|
||||
// AudioDuration returns duration of an audio file.
|
||||
// Supported formats: MP3, WAV (auto-detected by header)
|
||||
func AudioDuration(path string) (time.Duration, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// Peek first 12 bytes to detect format
|
||||
head := make([]byte, 12)
|
||||
n, err := io.ReadFull(f, head)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if n < 12 {
|
||||
return 0, errors.New("file too small")
|
||||
}
|
||||
|
||||
// WAV: RIFF....WAVE
|
||||
if string(head[0:4]) == "RIFF" && string(head[8:12]) == "WAVE" {
|
||||
if _, err := f.Seek(0, io.SeekStart); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return wavDuration(f)
|
||||
}
|
||||
|
||||
// MP3 can start with ID3 or frame sync 0xFFEx
|
||||
if string(head[0:3]) == "ID3" || (head[0] == 0xFF && (head[1]&0xE0) == 0xE0) {
|
||||
if _, err := f.Seek(0, io.SeekStart); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return mp3Duration(f)
|
||||
}
|
||||
|
||||
return 0, errors.New("unsupported audio format")
|
||||
}
|
||||
|
||||
// AudioDurationFromURL downloads the url to a temp file and returns duration.
|
||||
func AudioDurationFromURL(url string) (time.Duration, error) {
|
||||
path, err := fetchURLToTemp(url, 30*time.Second)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer os.Remove(path)
|
||||
return AudioDuration(path)
|
||||
}
|
||||
|
||||
// VideoDurationMP4 returns duration of an MP4 file (MOV/MP4 base media).
|
||||
func VideoDurationMP4(path string) (time.Duration, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
return mp4Duration(f)
|
||||
}
|
||||
|
||||
// VideoDurationMP4FromURL downloads the url to a temp file and returns duration.
|
||||
func VideoDurationMP4FromURL(url string) (time.Duration, error) {
|
||||
path, err := fetchURLToTemp(url, 30*time.Second)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer os.Remove(path)
|
||||
return VideoDurationMP4(path)
|
||||
}
|
||||
|
||||
// ---------------------- helpers ----------------------
|
||||
|
||||
func fetchURLToTemp(url string, timeout time.Duration) (string, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("http status: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
tmp, err := os.CreateTemp("", "media-*")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer tmp.Close()
|
||||
|
||||
if _, err := io.Copy(tmp, resp.Body); err != nil {
|
||||
path := tmp.Name()
|
||||
_ = os.Remove(path)
|
||||
return "", err
|
||||
}
|
||||
return tmp.Name(), nil
|
||||
}
|
||||
|
||||
// ---------------------- WAV ----------------------
|
||||
|
||||
func wavDuration(r io.ReadSeeker) (time.Duration, error) {
|
||||
// RIFF header already checked outside if needed. We parse chunks to get fmt and data.
|
||||
// WAV little-endian
|
||||
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Read RIFF header (12 bytes)
|
||||
head := make([]byte, 12)
|
||||
if _, err := io.ReadFull(r, head); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if string(head[0:4]) != "RIFF" || string(head[8:12]) != "WAVE" {
|
||||
return 0, errors.New("invalid wav header")
|
||||
}
|
||||
|
||||
var sampleRate uint32
|
||||
var numChans uint16
|
||||
var bitsPerSample uint16
|
||||
var byteRate uint32
|
||||
var dataSize uint32
|
||||
|
||||
for {
|
||||
chunkHdr := make([]byte, 8)
|
||||
if _, err := io.ReadFull(r, chunkHdr); err != nil {
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
break
|
||||
}
|
||||
return 0, err
|
||||
}
|
||||
ckID := string(chunkHdr[0:4])
|
||||
ckSize := binary.LittleEndian.Uint32(chunkHdr[4:8])
|
||||
|
||||
switch ckID {
|
||||
case "fmt ":
|
||||
fmtData := make([]byte, ckSize)
|
||||
if _, err := io.ReadFull(r, fmtData); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
// audioFormat := binary.LittleEndian.Uint16(fmtData[0:2]) // 1 = PCM
|
||||
numChans = binary.LittleEndian.Uint16(fmtData[2:4])
|
||||
sampleRate = binary.LittleEndian.Uint32(fmtData[4:8])
|
||||
byteRate = binary.LittleEndian.Uint32(fmtData[8:12])
|
||||
// blockAlign := binary.LittleEndian.Uint16(fmtData[12:14])
|
||||
if len(fmtData) >= 16 {
|
||||
bitsPerSample = binary.LittleEndian.Uint16(fmtData[14:16])
|
||||
}
|
||||
case "data":
|
||||
dataSize = ckSize
|
||||
// Skip data content
|
||||
if _, err := r.Seek(int64(ckSize), io.SeekCurrent); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
default:
|
||||
// Skip other chunks
|
||||
if _, err := r.Seek(int64(ckSize), io.SeekCurrent); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
// Chunks are word-aligned (pad byte if odd size)
|
||||
if ckSize%2 == 1 {
|
||||
if _, err := r.Seek(1, io.SeekCurrent); err != nil { // skip pad
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if sampleRate == 0 || numChans == 0 {
|
||||
return 0, errors.New("invalid wav fmt")
|
||||
}
|
||||
|
||||
var durationSeconds float64
|
||||
if byteRate != 0 {
|
||||
durationSeconds = float64(dataSize) / float64(byteRate)
|
||||
} else {
|
||||
bytesPerSec := float64(sampleRate) * float64(numChans) * float64(bitsPerSample) / 8.0
|
||||
if bytesPerSec == 0 {
|
||||
return 0, errors.New("invalid wav parameters")
|
||||
}
|
||||
durationSeconds = float64(dataSize) / bytesPerSec
|
||||
}
|
||||
return time.Duration(durationSeconds * float64(time.Second)), nil
|
||||
}
|
||||
|
||||
// ---------------------- MP3 ----------------------
|
||||
|
||||
func mp3Duration(r io.ReadSeeker) (time.Duration, error) {
|
||||
// Strategy:
|
||||
// 1) Skip ID3v2 header if present.
|
||||
// 2) Try read first frame and detect XING/Info or VBRI to get total frames and duration.
|
||||
// 3) If VBR headers not present, fall back to CBR estimation: (audioDataBytes * 8) / bitrate.
|
||||
|
||||
// File size
|
||||
fi, err := fileSizeFromSeeker(r)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Skip ID3v2
|
||||
var id3v2Size int64
|
||||
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
id3v2Size, err = skipID3v2(r)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Remember audio start offset
|
||||
startOffset, _ := r.Seek(0, io.SeekCurrent)
|
||||
|
||||
// Read first frame header (search sync)
|
||||
off, fh, err := findNextMP3Frame(r)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if _, err := r.Seek(off, io.SeekStart); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Check for XING/Info header in first frame (for VBR)
|
||||
totalFrames, sampleRate, samplesPerFrame, bitrateKbps, vbrFound, err := parseFirstFrameForVBR(r, fh)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if vbrFound && totalFrames > 0 && sampleRate > 0 && samplesPerFrame > 0 {
|
||||
seconds := (float64(totalFrames) * float64(samplesPerFrame)) / float64(sampleRate)
|
||||
return time.Duration(seconds * float64(time.Second)), nil
|
||||
}
|
||||
|
||||
// Fall back to CBR estimate using bitrate and data size (excluding ID3v2 and ID3v1)
|
||||
// Detect ID3v1 at end (128 bytes TAG)
|
||||
var id3v1Size int64
|
||||
if fi >= 128 {
|
||||
if _, err := r.Seek(fi-128, io.SeekStart); err == nil {
|
||||
buf := make([]byte, 3)
|
||||
if _, err := io.ReadFull(r, buf); err == nil {
|
||||
if string(buf) == "TAG" {
|
||||
id3v1Size = 128
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
audioBytes := fi - id3v2Size - id3v1Size - startOffset
|
||||
if audioBytes <= 0 || bitrateKbps == 0 {
|
||||
return 0, errors.New("unable to estimate mp3 duration")
|
||||
}
|
||||
// bitrateKbps in kbps, bytes -> bits
|
||||
seconds := float64(audioBytes*8) / float64(bitrateKbps*1000)
|
||||
return time.Duration(seconds * float64(time.Second)), nil
|
||||
}
|
||||
|
||||
type mp3FrameHeader struct {
|
||||
Version int // 1: MPEG1, 2: MPEG2, 25: MPEG2.5
|
||||
Layer int // 1,2,3
|
||||
BitrateKbps int
|
||||
SampleRate int
|
||||
Padding int
|
||||
ChannelMode int // 0:Stereo,1:Joint,2:Dual,3:Mono
|
||||
}
|
||||
|
||||
func findNextMP3Frame(r io.ReadSeeker) (int64, mp3FrameHeader, error) {
|
||||
var hdr mp3FrameHeader
|
||||
// Start from current pos and scan up to 64KB
|
||||
start, _ := r.Seek(0, io.SeekCurrent)
|
||||
limit := int64(64 * 1024)
|
||||
buf := make([]byte, limit)
|
||||
n, err := r.Read(buf)
|
||||
if err != nil && !errors.Is(err, io.EOF) {
|
||||
return 0, hdr, err
|
||||
}
|
||||
for i := 0; i+4 <= n; i++ {
|
||||
if buf[i] == 0xFF && (buf[i+1]&0xE0) == 0xE0 { // sync
|
||||
if h, ok := parseMP3Header(buf[i : i+4]); ok {
|
||||
offset := start + int64(i)
|
||||
return offset, h, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0, hdr, errors.New("mp3 frame not found")
|
||||
}
|
||||
|
||||
func parseMP3Header(b []byte) (mp3FrameHeader, bool) {
|
||||
var h mp3FrameHeader
|
||||
if len(b) < 4 {
|
||||
return h, false
|
||||
}
|
||||
if b[0] != 0xFF || (b[1]&0xE0) != 0xE0 {
|
||||
return h, false
|
||||
}
|
||||
versionBits := (b[1] >> 3) & 0x03
|
||||
layerBits := (b[1] >> 1) & 0x03
|
||||
bitrateBits := (b[2] >> 4) & 0x0F
|
||||
sampleRateBits := (b[2] >> 2) & 0x03
|
||||
paddingBit := (b[2] >> 1) & 0x01
|
||||
channelMode := (b[3] >> 6) & 0x03
|
||||
|
||||
var version int
|
||||
switch versionBits {
|
||||
case 0x00:
|
||||
version = 25 // MPEG 2.5
|
||||
case 0x02:
|
||||
version = 2 // MPEG 2
|
||||
case 0x03:
|
||||
version = 1 // MPEG 1
|
||||
default:
|
||||
return h, false
|
||||
}
|
||||
|
||||
var layer int
|
||||
switch layerBits {
|
||||
case 0x01:
|
||||
layer = 3
|
||||
case 0x02:
|
||||
layer = 2
|
||||
case 0x03:
|
||||
layer = 1
|
||||
default:
|
||||
return h, false
|
||||
}
|
||||
|
||||
br := mp3BitrateKbps(version, layer, int(bitrateBits))
|
||||
if br == 0 {
|
||||
return h, false
|
||||
}
|
||||
sr := mp3SampleRate(version, int(sampleRateBits))
|
||||
if sr == 0 {
|
||||
return h, false
|
||||
}
|
||||
|
||||
h = mp3FrameHeader{
|
||||
Version: version,
|
||||
Layer: layer,
|
||||
BitrateKbps: br,
|
||||
SampleRate: sr,
|
||||
Padding: int(paddingBit),
|
||||
ChannelMode: int(channelMode),
|
||||
}
|
||||
return h, true
|
||||
}
|
||||
|
||||
func mp3BitrateKbps(version, layer, index int) int {
|
||||
// index: 1..14 valid; 0,15 invalid
|
||||
if index <= 0 || index == 15 {
|
||||
return 0
|
||||
}
|
||||
// Tables per ISO/IEC 11172-3/13818-3 (common subset)
|
||||
var tbl [15]int
|
||||
if layer == 1 { // Layer I
|
||||
if version == 1 { // MPEG1
|
||||
tbl = [15]int{0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448}
|
||||
} else { // MPEG2/2.5
|
||||
tbl = [15]int{0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256}
|
||||
}
|
||||
} else if layer == 2 { // Layer II
|
||||
if version == 1 {
|
||||
tbl = [15]int{0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384}
|
||||
} else {
|
||||
tbl = [15]int{0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160}
|
||||
}
|
||||
} else { // Layer III
|
||||
if version == 1 {
|
||||
tbl = [15]int{0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320}
|
||||
} else {
|
||||
tbl = [15]int{0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160}
|
||||
}
|
||||
}
|
||||
return tbl[index]
|
||||
}
|
||||
|
||||
func mp3SampleRate(version, index int) int {
|
||||
if index == 3 {
|
||||
return 0
|
||||
}
|
||||
// base table for MPEG1
|
||||
base := [3]int{44100, 48000, 32000}
|
||||
sr := base[index]
|
||||
if version == 2 { // MPEG2
|
||||
sr /= 2
|
||||
} else if version == 25 { // MPEG2.5
|
||||
sr /= 4
|
||||
}
|
||||
return sr
|
||||
}
|
||||
|
||||
func samplesPerMP3Frame(version, layer int) int {
|
||||
switch layer {
|
||||
case 1:
|
||||
return 384
|
||||
case 2:
|
||||
return 1152
|
||||
case 3:
|
||||
if version == 1 {
|
||||
return 1152
|
||||
}
|
||||
return 576 // MPEG2/2.5 Layer III
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func parseFirstFrameForVBR(r io.ReadSeeker, fh mp3FrameHeader) (totalFrames uint32, sampleRate int, samplesPerFrame int, bitrateKbps int, vbrFound bool, err error) {
|
||||
// After the 4-byte header, possible side info and then XING/Info
|
||||
if _, err = r.Seek(0, io.SeekCurrent); err != nil {
|
||||
return
|
||||
}
|
||||
// Re-read header
|
||||
hdr := make([]byte, 4)
|
||||
if _, err = io.ReadFull(r, hdr); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// side info size depends on MPEG version and channel mode (for Layer III)
|
||||
sideInfoSize := 0
|
||||
if fh.Layer == 3 { // Layer III
|
||||
if fh.Version == 1 { // MPEG1
|
||||
if fh.ChannelMode == 3 { // mono
|
||||
sideInfoSize = 17
|
||||
} else {
|
||||
sideInfoSize = 32
|
||||
}
|
||||
} else { // MPEG2/2.5
|
||||
if fh.ChannelMode == 3 {
|
||||
sideInfoSize = 9
|
||||
} else {
|
||||
sideInfoSize = 17
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Read next up to 120 bytes to search for XING/Info or VBRI
|
||||
buf := make([]byte, sideInfoSize+120)
|
||||
if _, err = io.ReadFull(r, buf); err != nil {
|
||||
// If short, still try within available
|
||||
if !errors.Is(err, io.ErrUnexpectedEOF) && !errors.Is(err, io.EOF) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Search XING/Info signature
|
||||
sigs := [][]byte{[]byte("Xing"), []byte("Info")}
|
||||
for _, sig := range sigs {
|
||||
idx := indexOf(buf, sig)
|
||||
if idx >= 0 {
|
||||
// flags after signature (4 bytes), then if frames flag set, 4 bytes frames
|
||||
if len(buf) >= idx+4+4 {
|
||||
flags := binary.BigEndian.Uint32(buf[idx+4 : idx+8])
|
||||
var frames uint32
|
||||
if (flags & 0x01) != 0 { // frames present
|
||||
if len(buf) >= idx+8+4 {
|
||||
frames = binary.BigEndian.Uint32(buf[idx+8 : idx+12])
|
||||
}
|
||||
}
|
||||
if frames > 0 {
|
||||
vbrFound = true
|
||||
totalFrames = frames
|
||||
sampleRate = fh.SampleRate
|
||||
samplesPerFrame = samplesPerMP3Frame(fh.Version, fh.Layer)
|
||||
bitrateKbps = fh.BitrateKbps
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check VBRI (usually at 32 bytes after header for MPEG1 Layer III)
|
||||
if len(buf) >= 4 {
|
||||
idx := indexOf(buf, []byte("VBRI"))
|
||||
if idx >= 0 {
|
||||
if len(buf) >= idx+4+2+2+4+4 {
|
||||
// VBRI layout: 'VBRI'(4) + version(2) + delay(2) + quality(2?) varies; but at offset 10 comes bytes: bytes (4), frames (4)
|
||||
// Some docs: offset 10: bytes, offset 14: frames (big-endian)
|
||||
bytesOffset := idx + 10
|
||||
framesOffset := idx + 14
|
||||
if len(buf) >= framesOffset+4 {
|
||||
frames := binary.BigEndian.Uint32(buf[framesOffset : framesOffset+4])
|
||||
if frames > 0 {
|
||||
vbrFound = true
|
||||
totalFrames = frames
|
||||
sampleRate = fh.SampleRate
|
||||
samplesPerFrame = samplesPerMP3Frame(fh.Version, fh.Layer)
|
||||
bitrateKbps = fh.BitrateKbps
|
||||
_ = bytesOffset // not used
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No VBR header. Provide header info for CBR fallback
|
||||
sampleRate = fh.SampleRate
|
||||
samplesPerFrame = samplesPerMP3Frame(fh.Version, fh.Layer)
|
||||
bitrateKbps = fh.BitrateKbps
|
||||
return
|
||||
}
|
||||
|
||||
func indexOf(haystack []byte, needle []byte) int {
|
||||
for i := 0; i+len(needle) <= len(haystack); i++ {
|
||||
match := true
|
||||
for j := 0; j < len(needle); j++ {
|
||||
if haystack[i+j] != needle[j] {
|
||||
match = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if match {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func skipID3v2(r io.ReadSeeker) (int64, error) {
|
||||
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
head := make([]byte, 10)
|
||||
if _, err := io.ReadFull(r, head); err != nil {
|
||||
return 0, nil // no header
|
||||
}
|
||||
if string(head[0:3]) != "ID3" {
|
||||
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
// size: 4 synchsafe bytes
|
||||
sz := int64((int(head[6]&0x7F) << 21) | (int(head[7]&0x7F) << 14) | (int(head[8]&0x7F) << 7) | int(head[9]&0x7F))
|
||||
// total header size = 10 + sz (+ footer 10 if flag set)
|
||||
footer := int64(0)
|
||||
if (head[5] & 0x10) != 0 { // footer present
|
||||
footer = 10
|
||||
}
|
||||
total := 10 + sz + footer
|
||||
if _, err := r.Seek(total, io.SeekStart); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return total, nil
|
||||
}
|
||||
|
||||
func fileSizeFromSeeker(r io.ReadSeeker) (int64, error) {
|
||||
cur, err := r.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
end, err := r.Seek(0, io.SeekEnd)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if _, err := r.Seek(cur, io.SeekStart); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return end, nil
|
||||
}
|
||||
|
||||
// ---------------------- MP4 ----------------------
|
||||
|
||||
type mp4BoxHeader struct {
|
||||
Size uint64
|
||||
Type [4]byte
|
||||
}
|
||||
|
||||
func readBoxHeader(r io.ReadSeeker) (mp4BoxHeader, error) {
|
||||
var h mp4BoxHeader
|
||||
buf := make([]byte, 8)
|
||||
if _, err := io.ReadFull(r, buf); err != nil {
|
||||
return h, err
|
||||
}
|
||||
sz := binary.BigEndian.Uint32(buf[0:4])
|
||||
copy(h.Type[:], buf[4:8])
|
||||
if sz == 1 {
|
||||
// 64-bit size follows
|
||||
ext := make([]byte, 8)
|
||||
if _, err := io.ReadFull(r, ext); err != nil {
|
||||
return h, err
|
||||
}
|
||||
h.Size = binary.BigEndian.Uint64(ext)
|
||||
} else {
|
||||
h.Size = uint64(sz)
|
||||
}
|
||||
return h, nil
|
||||
}
|
||||
|
||||
func skipBox(r io.ReadSeeker, boxSize uint64, alreadyRead int64) error {
|
||||
toSkip := int64(boxSize) - alreadyRead
|
||||
if toSkip < 0 {
|
||||
return fmt.Errorf("invalid box size")
|
||||
}
|
||||
_, err := r.Seek(toSkip, io.SeekCurrent)
|
||||
return err
|
||||
}
|
||||
|
||||
func mp4Duration(r io.ReadSeeker) (time.Duration, error) {
|
||||
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
// Find moov box
|
||||
var moovStart int64
|
||||
var moovSize uint64
|
||||
for {
|
||||
pos, _ := r.Seek(0, io.SeekCurrent)
|
||||
h, err := readBoxHeader(r)
|
||||
if err != nil {
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
break
|
||||
}
|
||||
return 0, err
|
||||
}
|
||||
if string(h.Type[:]) == "moov" {
|
||||
moovStart = pos
|
||||
moovSize = h.Size
|
||||
break
|
||||
}
|
||||
if h.Size < 8 {
|
||||
return 0, errors.New("invalid mp4 box size")
|
||||
}
|
||||
if err := skipBox(r, h.Size, 8); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
if moovStart == 0 && moovSize == 0 {
|
||||
return 0, errors.New("moov not found")
|
||||
}
|
||||
// Parse inside moov for video trak mdhd, else mvhd
|
||||
if _, err := r.Seek(moovStart+8, io.SeekStart); err != nil { // skip moov header
|
||||
return 0, err
|
||||
}
|
||||
end := moovStart + int64(moovSize)
|
||||
var movieTimescale uint32
|
||||
var movieDuration uint64
|
||||
var foundVideoMdhd bool
|
||||
var mdhdTimescale uint32
|
||||
var mdhdDuration uint64
|
||||
|
||||
for {
|
||||
pos, _ := r.Seek(0, io.SeekCurrent)
|
||||
if pos >= end {
|
||||
break
|
||||
}
|
||||
h, err := readBoxHeader(r)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
switch string(h.Type[:]) {
|
||||
case "mvhd":
|
||||
// movie header
|
||||
ver := make([]byte, 1)
|
||||
if _, err := io.ReadFull(r, ver); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if _, err := r.Seek(3, io.SeekCurrent); err != nil { // flags
|
||||
return 0, err
|
||||
}
|
||||
if ver[0] == 1 {
|
||||
// version 1: 64-bit duration
|
||||
buf := make([]byte, 8+8+4+8) // ctime(8) mtime(8) timescale(4) duration(8)
|
||||
if _, err := io.ReadFull(r, buf); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
movieTimescale = binary.BigEndian.Uint32(buf[16:20])
|
||||
movieDuration = binary.BigEndian.Uint64(buf[20:28])
|
||||
} else {
|
||||
buf := make([]byte, 4+4+4+4) // ctime mtime timescale duration
|
||||
if _, err := io.ReadFull(r, buf); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
movieTimescale = binary.BigEndian.Uint32(buf[8:12])
|
||||
movieDuration = uint64(binary.BigEndian.Uint32(buf[12:16]))
|
||||
}
|
||||
// skip rest of mvhd
|
||||
read := int64(1 + 3)
|
||||
if ver[0] == 1 {
|
||||
read += int64(8 + 8 + 4 + 8)
|
||||
} else {
|
||||
read += int64(4 + 4 + 4 + 4)
|
||||
}
|
||||
if err := skipBox(r, h.Size, 8+read); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
case "trak":
|
||||
// parse trak for hdlr and mdhd
|
||||
tEnd := int64(0)
|
||||
if h.Size < 8 {
|
||||
return 0, errors.New("invalid trak size")
|
||||
}
|
||||
tEnd = pos + int64(h.Size)
|
||||
var isVideo bool
|
||||
var tMdhdTimescale uint32
|
||||
var tMdhdDuration uint64
|
||||
for {
|
||||
cpos, _ := r.Seek(0, io.SeekCurrent)
|
||||
if cpos >= tEnd {
|
||||
break
|
||||
}
|
||||
ch, err := readBoxHeader(r)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
switch string(ch.Type[:]) {
|
||||
case "mdia":
|
||||
mEnd := cpos + int64(ch.Size)
|
||||
for {
|
||||
mpos, _ := r.Seek(0, io.SeekCurrent)
|
||||
if mpos >= mEnd {
|
||||
break
|
||||
}
|
||||
mh, err := readBoxHeader(r)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
switch string(mh.Type[:]) {
|
||||
case "hdlr":
|
||||
// skip version+flags (4), pre_defined(4)
|
||||
if _, err := r.Seek(8, io.SeekCurrent); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
handler := make([]byte, 4)
|
||||
if _, err := io.ReadFull(r, handler); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if string(handler) == "vide" {
|
||||
isVideo = true
|
||||
}
|
||||
if err := skipBox(r, mh.Size, 8+8+4); err != nil { // header + skipped + read handler
|
||||
return 0, err
|
||||
}
|
||||
case "mdhd":
|
||||
ver := make([]byte, 1)
|
||||
if _, err := io.ReadFull(r, ver); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if _, err := r.Seek(3, io.SeekCurrent); err != nil { // flags
|
||||
return 0, err
|
||||
}
|
||||
if ver[0] == 1 {
|
||||
buf := make([]byte, 8+8+4+8)
|
||||
if _, err := io.ReadFull(r, buf); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
tMdhdTimescale = binary.BigEndian.Uint32(buf[16:20])
|
||||
tMdhdDuration = binary.BigEndian.Uint64(buf[20:28])
|
||||
} else {
|
||||
buf := make([]byte, 4+4+4+4)
|
||||
if _, err := io.ReadFull(r, buf); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
tMdhdTimescale = binary.BigEndian.Uint32(buf[8:12])
|
||||
tMdhdDuration = uint64(binary.BigEndian.Uint32(buf[12:16]))
|
||||
}
|
||||
if err := skipBox(r, mh.Size, 8+1+3+int64(lenVersionPayload(ver[0]))); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
default:
|
||||
if err := skipBox(r, mh.Size, 8); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
}
|
||||
default:
|
||||
if err := skipBox(r, ch.Size, 8); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
}
|
||||
if isVideo && tMdhdTimescale != 0 && tMdhdDuration != 0 {
|
||||
foundVideoMdhd = true
|
||||
mdhdTimescale = tMdhdTimescale
|
||||
mdhdDuration = tMdhdDuration
|
||||
}
|
||||
// Skip remaining of trak if any
|
||||
if _, err := r.Seek(tEnd, io.SeekStart); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
default:
|
||||
if err := skipBox(r, h.Size, 8); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if foundVideoMdhd && mdhdTimescale != 0 {
|
||||
sec := float64(mdhdDuration) / float64(mdhdTimescale)
|
||||
return time.Duration(sec * float64(time.Second)), nil
|
||||
}
|
||||
if movieTimescale != 0 {
|
||||
sec := float64(movieDuration) / float64(movieTimescale)
|
||||
return time.Duration(sec * float64(time.Second)), nil
|
||||
}
|
||||
return 0, errors.New("failed to read mp4 duration")
|
||||
}
|
||||
|
||||
func lenVersionPayload(ver byte) int {
|
||||
if ver == 1 {
|
||||
return 8 + 8 + 4 + 8
|
||||
}
|
||||
return 4 + 4 + 4 + 4
|
||||
}
|
||||
Reference in New Issue
Block a user