87 lines
2.7 KiB
Python
87 lines
2.7 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Generate voice with custom Kimi-XXX filename using local Kokoro TTS
|
||
|
|
Usage: generate_voice.py "Text to speak" [--voice af_bella] [--output-dir /tmp] [--speed 1.3]
|
||
|
|
"""
|
||
|
|
|
||
|
|
import argparse
|
||
|
|
import json
|
||
|
|
import os
|
||
|
|
import sys
|
||
|
|
import tempfile
|
||
|
|
import urllib.request
|
||
|
|
from datetime import datetime
|
||
|
|
|
||
|
|
def generate_voice(text, voice="af_bella", output_dir="/tmp", model="tts-1", speed=1.3):
|
||
|
|
"""Generate voice file with Kimi-XXX filename"""
|
||
|
|
|
||
|
|
# Generate unique filename: Kimi-YYYYMMDD-HHMMSS.ogg
|
||
|
|
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||
|
|
filename = f"Kimi-{timestamp}.ogg"
|
||
|
|
filepath = os.path.join(output_dir, filename)
|
||
|
|
|
||
|
|
# Call local Kokoro TTS
|
||
|
|
tts_url = "http://10.0.0.228:8880/v1/audio/speech"
|
||
|
|
|
||
|
|
data = json.dumps({
|
||
|
|
"model": model,
|
||
|
|
"input": text,
|
||
|
|
"voice": voice,
|
||
|
|
"speed": speed
|
||
|
|
}).encode()
|
||
|
|
|
||
|
|
req = urllib.request.Request(
|
||
|
|
tts_url,
|
||
|
|
data=data,
|
||
|
|
headers={"Content-Type": "application/json"}
|
||
|
|
)
|
||
|
|
|
||
|
|
try:
|
||
|
|
with urllib.request.urlopen(req) as response:
|
||
|
|
audio_data = response.read()
|
||
|
|
|
||
|
|
# Save to file
|
||
|
|
with open(filepath, "wb") as f:
|
||
|
|
f.write(audio_data)
|
||
|
|
|
||
|
|
# Estimate duration (rough: ~150 chars per minute at normal speed, adjusted for speed)
|
||
|
|
estimated_duration = max(1, len(text) / 150 * 60 / speed)
|
||
|
|
|
||
|
|
result = {
|
||
|
|
"filepath": filepath,
|
||
|
|
"filename": filename,
|
||
|
|
"size_bytes": len(audio_data),
|
||
|
|
"estimated_duration_seconds": round(estimated_duration, 1),
|
||
|
|
"voice": voice,
|
||
|
|
"speed": speed,
|
||
|
|
"text": text
|
||
|
|
}
|
||
|
|
|
||
|
|
print(json.dumps(result))
|
||
|
|
return result
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
error_result = {
|
||
|
|
"error": str(e),
|
||
|
|
"filepath": None,
|
||
|
|
"filename": None
|
||
|
|
}
|
||
|
|
print(json.dumps(error_result), file=sys.stderr)
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
parser = argparse.ArgumentParser(description="Generate voice with Kimi-XXX filename")
|
||
|
|
parser.add_argument("text", help="Text to convert to speech")
|
||
|
|
parser.add_argument("--voice", default="af_bella",
|
||
|
|
help="Voice ID (default: af_bella)")
|
||
|
|
parser.add_argument("--output-dir", default="/tmp",
|
||
|
|
help="Output directory (default: /tmp)")
|
||
|
|
parser.add_argument("--model", default="tts-1",
|
||
|
|
help="TTS model (default: tts-1)")
|
||
|
|
parser.add_argument("--speed", type=float, default=1.3,
|
||
|
|
help="Speech speed multiplier (default: 1.3)")
|
||
|
|
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
generate_voice(args.text, args.voice, args.output_dir, args.model, args.speed)
|