AI视频生成从入门到精通:全栈实战指南
1. AI视频生成技术全景图
1.1 技术架构概览
graph TB
A[AI视频生成系统] --> B[输入层]
A --> C[处理层]
A --> D[输出层]
B --> B1[文本提示]
B --> B2[图像输入]
B --> B3[音频输入]
B --> B4[视频输入]
C --> C1[文本编码器]
C --> C2[扩散模型]
C --> C3[运动模型]
C --> C4[时空一致性]
D --> D1[视频输出]
D --> D2[音频合成]
D --> D3[字幕生成]
D --> D4[特效处理]
1.2 主流平台对比分析
| 平台名称 |
核心技术 |
最大分辨率 |
时长限制 |
帧率 |
特色功能 |
成本 |
| Runway Gen-2 |
扩散模型 |
1280x768 |
18秒 |
24fps |
多模态输入、运动控制 |
$15-76/月 |
| Pika 1.0 |
专有模型 |
1024x576 |
3秒免费 |
24fps |
文本+图像视频 |
免费-$58/月 |
| Stable Video Diffusion |
开源模型 |
1024x576 |
25帧 |
3-30fps |
完全开源、可定制 |
本地部署 |
| Kaiber |
多种模型 |
1280x720 |
无限制 |
30fps |
风格转换、音乐视频 |
$5-30/月 |
| Luma Dream Machine |
扩散模型 |
1280x720 |
5秒 |
24fps |
超快生成、逼真动画 |
免费试用 |
| HeyGen |
数字人 |
1920x1080 |
5分钟 |
30fps |
数字人播报、语音克隆 |
$24-720/月 |
| Sora (OpenAI) |
扩散Transformer |
1920x1080+ |
60秒 |
30fps |
物理模拟、长时一致性 |
未公开 |
2. 入门篇:5分钟快速上手
2.1 Runway Gen-2 零基础入门
"""
Runway Gen-2 快速入门脚本
通过API调用生成第一个AI视频
"""
import requests
import json
import time
import os
from pathlib import Path
from dotenv import load_dotenv
class RunwayVideoGenerator:
"""Runway视频生成器"""
def __init__(self, api_key=None):
load_dotenv()
self.api_key = api_key or os.getenv('RUNWAY_API_KEY')
if not self.api_key:
raise ValueError("请设置RUNWAY_API_KEY环境变量或传入api_key参数")
self.base_url = "https://api.runwayml.com/v1"
self.headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
self.output_dir = Path("generated_videos")
self.output_dir.mkdir(exist_ok=True)
print(f"✅ Runway视频生成器初始化完成")
print(f"📁 输出目录: {self.output_dir.absolute()}")
def generate_from_text(self, prompt, **kwargs):
"""从文本生成视频"""
print(f"🎬 正在生成视频: '{prompt[:50]}...'")
data = {
"prompt": prompt,
**kwargs
}
defaults = {
"duration": 4,
"seed": int(time.time() * 1000) % 1000000,
"upscale": True,
"watermark": False,
"interpolate": True
}
data = {**defaults, **data}
try:
response = requests.post(
f"{self.base_url}/video/generate",
headers=self.headers,
json=data
)
if response.status_code == 200:
result = response.json()
video_id = result.get("id")
print(f"📤 生成任务已提交,任务ID: {video_id}")
video_url = self._poll_generation_status(video_id)
if video_url:
video_path = self._download_video(video_url, prompt)
return video_path
else:
print("❌ 视频生成失败")
return None
else:
print(f"❌ API请求失败: {response.status_code}")
print(f"响应: {response.text}")
return None
except Exception as e:
print(f"❌ 生成过程中出错: {str(e)}")
return None
def generate_from_image(self, image_path, prompt=None, **kwargs):
"""从图像生成视频"""
print(f"🖼️ 正在从图像生成视频: {image_path}")
with open(image_path, 'rb') as f:
files = {'image': f}
upload_response = requests.post(
f"{self.base_url}/files/upload",
headers={"Authorization": f"Bearer {self.api_key}"},
files=files
)
if upload_response.status_code != 200:
print(f"❌ 图像上传失败: {upload_response.text}")
return None
image_data = upload_response.json()
image_url = image_data.get("url")
data = {
"image_url": image_url,
**kwargs
}
if prompt:
data["prompt"] = prompt
response = requests.post(
f"{self.base_url}/video/generate/image-to-video",
headers=self.headers,
json=data
)
if response.status_code == 200:
result = response.json()
video_id = result.get("id")
video_url = self._poll_generation_status(video_id)
if video_url:
video_path = self._download_video(
video_url,
prompt or f"image_{Path(image_path).stem}"
)
return video_path
return None
def _poll_generation_status(self, task_id, max_attempts=60, interval=5):
"""轮询生成状态"""
print("⏳ 等待视频生成...", end="", flush=True)
for attempt in range(max_attempts):
time.sleep(interval)
status_response = requests.get(
f"{self.base_url}/tasks/{task_id}",
headers=self.headers
)
if status_response.status_code == 200:
status_data = status_response.json()
status = status_data.get("status")
print(".", end="", flush=True)
if status == "completed":
print("\n✅ 视频生成完成!")
return status_data.get("output", {}).get("video_url")
elif status == "failed":
print(f"\n❌ 生成失败: {status_data.get('error', '未知错误')}")
return None
else:
print(f"\n❌ 状态查询失败: {status_response.status_code}")
return None
print(f"\n❌ 生成超时({max_attempts * interval}秒)")
return None
def _download_video(self, video_url, prompt):
"""下载生成的视频"""
try:
timestamp = time.strftime("%Y%m%d_%H%M%S")
safe_prompt = "".join(c for c in prompt[:50] if c.isalnum() or c in (' ', '-', '_')).rstrip()
filename = f"{timestamp}_{safe_prompt}.mp4"
filepath = self.output_dir / filename
response = requests.get(video_url, stream=True)
if response.status_code == 200:
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print(f"💾 视频已保存: {filepath}")
video_size = filepath.stat().st_size / (1024 * 1024)
print(f"📊 文件大小: {video_size:.2f} MB")
return filepath
else:
print(f"❌ 视频下载失败: {response.status_code}")
return None
except Exception as e:
print(f"❌ 下载过程中出错: {str(e)}")
return None
def generate_batch(self, prompts, **kwargs):
"""批量生成视频"""
results = []
for i, prompt in enumerate(prompts, 1):
print(f"\n{'='*50}")
print(f"批次 {i}/{len(prompts)}: {prompt}")
print(f"{'='*50}")
video_path = self.generate_from_text(prompt, **kwargs)
results.append({
"prompt": prompt,
"video_path": video_path,
"success": video_path is not None
})
if i < len(prompts):
print("⏸️ 等待10秒继续...")
time.sleep(10)
self._generate_batch_report(results)
return results
def _generate_batch_report(self, results):
"""生成批量处理报告"""
report_path = self.output_dir / "batch_report.md"
with open(report_path, 'w', encoding='utf-8') as f:
f.write("# AI视频批量生成报告\n\n")
f.write(f"生成时间: {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n")
success_count = sum(1 for r in results if r['success'])
f.write(f"## 统计信息\n")
f.write(f"- 总任务数: {len(results)}\n")
f.write(f"- 成功数: {success_count}\n")
f.write(f"- 失败数: {len(results) - success_count}\n")
f.write(f"- 成功率: {success_count/len(results)*100:.1f}%\n\n")
f.write("## 详细结果\n\n")
for result in results:
status = "✅ 成功" if result['success'] else "❌ 失败"
f.write(f"### {result['prompt']}\n")
f.write(f"- 状态: {status}\n")
if result['success']:
f.write(f"- 文件: {result['video_path']}\n")
f.write("\n")
print(f"\n📊 批量报告已生成: {report_path}")
if __name__ == "__main__":
generator = RunwayVideoGenerator()
print("示例1: 文本到视频生成")
prompt = "A beautiful butterfly flying through a magical forest, cinematic lighting, 4K"
video_path = generator.generate_from_text(prompt)
if video_path:
print(f"🎉 视频生成成功!文件: {video_path}")
print("\n示例2: 图像到视频生成")
image_path = "input_image.jpg"
if Path(image_path).exists():
video_path = generator.generate_from_image(
image_path,
prompt="The image comes to life with gentle movement"
)
print("\n示例3: 批量生成")
prompts = [
"Sunrise over mountains, time-lapse, cinematic",
"Underwater coral reef with tropical fish, beautiful lighting",
"Cyberpunk city at night with flying cars, neon lights"
]
results = generator.generate_batch(prompts, duration=3)
2.2 环境配置与API获取
# 环境配置完整指南
## 第一步:获取API密钥
### 1. Runway API
1. 访问 https://runwayml.com
2. 注册账号并登录
3. 进入Account Settings → API Keys
4. 点击"Create New API Key"
5. 复制API密钥
### 2. Pika API (如需)
1. 访问 https://pika.art
2. 加入Discord等待邀请
3. 获取API访问权限
## 第二步:本地环境配置
```bash
# 1. 安装Python 3.8+
python --version
# 2. 创建虚拟环境
python -m venv ai-video-env
# 3. 激活虚拟环境
# Windows:
ai-video-env\Scripts\activate
# Mac/Linux:
source ai-video-env/bin/activate
# 4. 安装依赖包
pip install requests python-dotenv pillow opencv-python
# 5. 创建环境变量文件
cat > .env << EOF
RUNWAY_API_KEY=your_runway_api_key_here
PIKA_API_KEY=your_pika_api_key_here
OPENAI_API_KEY=your_openai_api_key_for_sora
EOF
# 6. 创建项目结构
mkdir -p generated_videos/raw
mkdir -p generated_videos/processed
mkdir -p input_images
mkdir -p scripts
第三步:测试环境
import os
from dotenv import load_dotenv
def test_environment():
load_dotenv()
print("🔍 环境测试开始")
print("="*40)
apis = ['RUNWAY_API_KEY', 'PIKA_API_KEY']
for api in apis:
key = os.getenv(api)
if key and len(key) > 20:
print(f"✅ {api}: 已设置")
elif key:
print(f"⚠️ {api}: 可能无效(长度过短)")
else:
print(f"❌ {api}: 未设置")
directories = ['generated_videos', 'input_images', 'scripts']
for dir_name in directories:
if os.path.exists(dir_name):
print(f"✅ 目录 '{dir_name}': 存在")
else:
print(f"❌ 目录 '{dir_name}': 不存在")
print("="*40)
print("环境测试完成")
if __name__ == "__main__":
test_environment()
第四步:快速验证
运行快速开始脚本,确保一切正常:
python runway_quickstart.py
## 3. 进阶篇:提示词工程与参数优化
### 3.1 高级提示词编写技巧
```python
# prompt_engineering.py
"""
AI视频生成提示词工程
"""
class VideoPromptEngineer:
"""视频提示词工程师"""
def __init__(self):
self.styles = {
"cinematic": [
"cinematic shot", "film grain", "shallow depth of field",
"anamorphic lens flare", "color grading", "35mm film"
],
"anime": [
"anime style", "Japanese animation", "Studio Ghibli",
"Makoto Shinkai", "sharp lines", "vibrant colors"
],
"cyberpunk": [
"cyberpunk 2077", "neon lights", "rainy night",
"futuristic city", "holographic displays", "synthwave"
],
"documentary": [
"documentary style", "handheld camera", "natural lighting",
"interview style", "b-roll footage", "voice over"
],
"fantasy": [
"fantasy art", "magical realism", "ethereal glow",
"mythical creatures", "enchanted forest", "dreamlike"
]
}
self.camera_movements = {
"静态": ["static shot", "locked camera", "tripod"],
"平移": ["dolly shot", "tracking shot", "camera pan left/right"],
"推进": ["zoom in", "push in", "dolly zoom"],
"拉升": ["crane shot", "helicopter shot", "bird's eye view"],
"手持": ["handheld camera", "shaky cam", "documentary style"],
"环绕": ["orbit shot", "360 shot", "circular movement"]
}
self.lighting_types = {
"自然光": ["natural lighting", "golden hour", "sunlight"],
"戏剧光": ["dramatic lighting", "chiaroscuro", "high contrast"],
"电影光": ["cinematic lighting", "moody", "rim light"],
"霓虹光": ["neon lighting", "cyberpunk lights", "colorful"],
"柔和光": ["soft lighting", "diffused", "even lighting"]
}
def construct_prompt(self, subject, action, style="cinematic",
camera="dolly shot", lighting="cinematic lighting",
quality="4K", extra_details=None):
"""构建结构化提示词"""
# 基础结构
prompt_parts = [
subject,
action
]
# 添加风格
if style in self.styles:
prompt_parts.extend(self.styles[style][:2])
# 添加相机运动
if camera in self.camera_movements:
prompt_parts.append(camera)
# 添加光照
if lighting in self.lighting_types:
prompt_parts.extend(self.lighting_types[lighting][:2])
# 质量描述
prompt_parts.append(quality)
prompt_parts.append("high quality")
# 额外细节
if extra_details:
if isinstance(extra_details, list):
prompt_parts.extend(extra_details)
else:
prompt_parts.append(extra_details)
# 构建最终提示词
prompt = ", ".join(prompt_parts)
return prompt
def generate_scene_breakdown(self, main_prompt):
"""生成场景分解"""
breakdown = {
"main_prompt": main_prompt,
"suggested_variations": [],
"technical_specs": {},
"style_references": []
}
# 分析提示词并生成变体
variations = self._generate_variations(main_prompt)
breakdown["suggested_variations"] = variations[:3]
# 生成技术规格建议
breakdown["technical_specs"] = self._suggest_technical_specs(main_prompt)
# 提供风格参考
breakdown["style_references"] = self._suggest_style_references(main_prompt)
return breakdown
def _generate_variations(self, prompt):
"""生成提示词变体"""
variations = []
# 变体1: 不同风格
for style_name in list(self.styles.keys())[:3]:
style_desc = self.styles[style_name][0]
variation = f"{prompt}, {style_desc} style"
variations.append(variation)
# 变体2: 不同相机角度
camera_options = ["low angle shot", "high angle shot", "Dutch angle"]
for camera in camera_options:
variation = f"{prompt}, {camera}"
variations.append(variation)
# 变体3: 不同时间段
times = ["at sunrise", "at golden hour", "at night", "in the rain"]
for time in times[:2]:
variation = f"{prompt}, {time}"
variations.append(variation)
return variations
def _suggest_technical_specs(self, prompt):
"""建议技术规格"""
specs = {}
# 基于提示词判断时长
if any(word in prompt.lower() for word in ["slow motion", "timelapse"]):
specs["duration"] = "8-12 seconds"
elif any(word in prompt.lower() for word in ["action", "fast", "running"]):
specs["duration"] = "4-6 seconds"
else:
specs["duration"] = "6-8 seconds"
# 建议帧率
if "slow motion" in prompt.lower():
specs["frame_rate"] = "60fps (for slow motion)"
else:
specs["frame_rate"] = "24fps (cinematic)"
# 建议宽高比
if any(word in prompt.lower() for word in ["cinematic", "movie", "film"]):
specs["aspect_ratio"] = "2.35:1 (cinemascope)"
else:
specs["aspect_ratio"] = "16:9 (standard)"
return specs
def _suggest_style_references(self, prompt):
"""建议风格参考"""
references = []
prompt_lower = prompt.lower()
if any(word in prompt_lower for word in ["cyberpunk", "neon", "futuristic"]):
references.append("Blade Runner 2049 (2017)")
references.append("Cyberpunk 2077 (game)")
if any(word in prompt_lower for word in ["fantasy", "magical", "enchanted"]):
references.append("Lord of the Rings (movie)")
references.append("Studio Ghibli films")
if any(word in prompt_lower for word in ["anime", "Japanese"]):
references.append("Your Name (2016)")
references.append("Spirited Away (2001)")
if any(word in prompt_lower for word in ["documentary", "realistic"]):
references.append("BBC Planet Earth")
references.append("National Geographic")
return references
def create_storyboard(self, scene_descriptions, transition="cut"):
"""创建分镜脚本"""
storyboard = {
"title": "AI Generated Video Storyboard",
"scenes": [],
"transitions": transition,
"total_duration": 0
}
for i, description in enumerate(scene_descriptions, 1):
scene = {
"scene_number": i,
"description": description,
"duration": self._estimate_scene_duration(description),
"shot_type": self._suggest_shot_type(description),
"camera_movement": self._suggest_camera_movement(description),
"lighting": self._suggest_lighting(description)
}
storyboard["scenes"].append(scene)
storyboard["total_duration"] += scene["duration"]
return storyboard
def _estimate_scene_duration(self, description):
"""估计场景时长"""
word_count = len(description.split())
if word_count < 10:
return 3 # 秒
elif word_count < 20:
return 5
else:
return 8
def _suggest_shot_type(self, description):
"""建议镜头类型"""
if "close up" in description.lower():
return "Close-up"
elif "wide shot" in description.lower() or "landscape" in description.lower():
return "Wide shot"
elif "medium shot" in description.lower():
return "Medium shot"
else:
return "Medium close-up"
def _suggest_camera_movement(self, description):
"""建议相机运动"""
desc_lower = description.lower()
if any(word in desc_lower for word in ["flying", "floating", "orbiting"]):
return "Orbit shot"
elif any(word in desc_lower for word in ["walking", "running", "moving"]):
return "Tracking shot"
elif "zoom" in desc_lower:
return "Zoom"
else:
return "Static shot"
def _suggest_lighting(self, description):
"""建议光照"""
desc_lower = description.lower()
if any(word in desc_lower for word in ["sunset", "sunrise", "golden"]):
return "Golden hour lighting"
elif any(word in desc_lower for word in ["night", "dark", "moonlight"]):
return "Low-key lighting"
elif any(word in desc_lower for word in ["bright", "sunny", "day"]):
return "High-key lighting"
else:
return "Natural lighting"
# 使用示例
if __name__ == "__main__":
engineer = VideoPromptEngineer()
# 示例1: 构建结构化提示词
print("示例1: 结构化提示词构建")
prompt = engineer.construct_prompt(
subject="A lone astronaut",
action="floating in space near a colorful nebula",
style="cinematic",
camera="slow dolly zoom",
lighting="dramatic lighting",
quality="8K",
extra_details=["stars twinkling", "reflection in visor"]
)
print(f"生成的提示词:\n{prompt}\n")
# 示例2: 场景分解
print("示例2: 提示词分解分析")
breakdown = engineer.generate_scene_breakdown(prompt)
print("提示词分析:")
print(f"原提示词: {breakdown['main_prompt']}")
print("\n推荐变体:")
for i, variation in enumerate(breakdown['suggested_variations'], 1):
print(f"{i}. {variation}")
print("\n技术规格建议:")
for key, value in breakdown['technical_specs'].items():
print(f"{key}: {value}")
print("\n风格参考:")
for reference in breakdown['style_references']:
print(f"- {reference}")
# 示例3: 创建分镜脚本
print("\n示例3: 分镜脚本生成")
scenes = [
"A spaceship approaching a distant planet",
"Close-up of astronaut's face inside the helmet",
"Wide shot of the colorful nebula in space",
"Astronaut reaching out towards the camera"
]
storyboard = engineer.create_storyboard(scenes, transition="fade")
print(f"分镜标题: {storyboard['title']}")
print(f"总时长: {storyboard['total_duration']}秒")
print(f"转场方式: {storyboard['transitions']}")
print("\n分镜详情:")
for scene in storyboard['scenes']:
print(f"\n场景 {scene['scene_number']}:")
print(f" 描述: {scene['description']}")
print(f" 时长: {scene['duration']}秒")
print(f" 镜头: {scene['shot_type']}")
print(f" 运动: {scene['camera_movement']}")
print(f" 光照: {scene['lighting']}")
3.2 参数优化策略
"""
AI视频生成参数优化
"""
import json
from dataclasses import dataclass
from typing import Dict, List, Optional
from enum import Enum
class ModelType(Enum):
"""模型类型枚举"""
RUNWAY_GEN2 = "runway_gen2"
STABLE_VIDEO = "stable_video"
PIKA = "pika"
LUMA = "luma"
class StylePreset(Enum):
"""风格预设"""
CINEMATIC = "cinematic"
ANIMATED = "animated"
REALISTIC = "realistic"
PAINTERLY = "painterly"
SKETCH = "sketch"
@dataclass
class VideoGenerationParams:
"""视频生成参数"""
prompt: str
model_type: ModelType = ModelType.RUNWAY_GEN2
duration_seconds: float = 4.0
fps: int = 24
seed: Optional[int] = None
cfg_scale: float = 7.5
steps: int = 50
style_preset: Optional[StylePreset] = None
negative_prompt: str = ""
motion_scale: float = 1.0
camera_control: Dict = None
def __post_init__(self):
if self.camera_control is None:
self.camera_control = {
"pan": 0.0,
"tilt": 0.0,
"zoom": 0.0,
"rotation": 0.0
}
class ParameterOptimizer:
"""参数优化器"""
def __init__(self):
self.parameter_ranges = {
"duration_seconds": {
"min": 2.0,
"max": 18.0,
"step": 2.0,
"description": "视频时长(秒)"
},
"cfg_scale": {
"min": 3.0,
"max": 15.0,
"step": 1.0,
"description": "提示词遵循度,越高越贴近提示词"
},
"steps": {
"min": 20,
"max": 100,
"step": 10,
"description": "扩散步数,越高质量越好但越慢"
},
"motion_scale": {
"min": 0.1,
"max": 2.0,
"step": 0.2,
"description": "运动强度"
}
}
self.style_presets = {
StylePreset.CINEMATIC: {
"cfg_scale": 8.0,
"steps": 60,
"negative_prompt": "blurry, low quality, distorted, ugly",
"description": "电影感风格"
},
StylePreset.ANIMATED: {
"cfg_scale": 7.0,
"steps": 40,
"negative_prompt": "realistic, photo, photograph",
"description": "动画风格"
},
StylePreset.REALISTIC: {
"cfg_scale": 9.0,
"steps": 80,
"negative_prompt": "cartoon, anime, painting, drawing",
"description": "写实风格"
},
StylePreset.PAINTERLY: {
"cfg_scale": 6.5,
"steps": 50,
"negative_prompt": "photorealistic, sharp, detailed",
"description": "油画风格"
}
}
def optimize_for_scene(self, scene_description: str) -> VideoGenerationParams:
"""根据场景描述优化参数"""
scene_type = self._analyze_scene_type(scene_description)
params = VideoGenerationParams(
prompt=scene_description,
model_type=ModelType.RUNWAY_GEN2
)
if scene_type == "action":
params.duration_seconds = 3.0
params.fps = 30
params.motion_scale = 1.5
params.style_preset = StylePreset.CINEMATIC
elif scene_type == "landscape":
params.duration_seconds = 8.0
params.fps = 24
params.motion_scale = 0.3
params.style_preset = StylePreset.REALISTIC
elif scene_type == "portrait":
params.duration_seconds = 4.0
params.fps = 24
params.motion_scale = 0.1
params.style_preset = StylePreset.CINEMATIC
elif scene_type == "abstract":
params.duration_seconds = 6.0
params.fps = 24
params.motion_scale = 0.8
params.style_preset = StylePreset.PAINTERLY
if params.style_preset:
preset = self.style_presets[params.style_preset]
params.cfg_scale = preset["cfg_scale"]
params.steps = preset["steps"]
params.negative_prompt = preset["negative_prompt"]
return params
def _analyze_scene_type(self, description: str) -> str:
"""分析场景类型"""
desc_lower = description.lower()
action_keywords = ["running", "fighting", "exploding", "fast", "action"]
landscape_keywords = ["mountain", "ocean", "forest", "landscape", "view"]
portrait_keywords = ["face", "person", "portrait", "close-up", "character"]
abstract_keywords = ["abstract", "pattern", "flow", "shape", "color"]
if any(word in desc_lower for word in action_keywords):
return "action"
elif any(word in desc_lower for word in landscape_keywords):
return "landscape"
elif any(word in desc_lower for word in portrait_keywords):
return "portrait"
elif any(word in desc_lower for word in abstract_keywords):
return "abstract"
else:
return "general"
def generate_parameter_grid(self, base_params: VideoGenerationParams,
variables: List[str]) -> List[VideoGenerationParams]:
"""生成参数网格用于A/B测试"""
variations = []
for var in variables:
if var in self.parameter_ranges:
range_info = self.parameter_ranges[var]
current_value = getattr(base_params, var)
low = max(range_info["min"], current_value - range_info["step"])
high = min(range_info["max"], current_value + range_info["step"])
for value in [low, current_value, high]:
if value != current_value:
new_params = self._clone_params(base_params)
setattr(new_params, var, value)
variations.append(new_params)
return variations
def _clone_params(self, params: VideoGenerationParams) -> VideoGenerationParams:
"""克隆参数对象"""
return VideoGenerationParams(
prompt=params.prompt,
model_type=params.model_type,
duration_seconds=params.duration_seconds,
fps=params.fps,
seed=params.seed,
cfg_scale=params.cfg_scale,
steps=params.steps,
style_preset=params.style_preset,
negative_prompt=params.negative_prompt,
motion_scale=params.motion_scale,
camera_control=params.camera_control.copy()
)
def create_camera_animation(self, scene_duration: float,
animation_type: str = "gentle_pan") -> Dict:
"""创建相机动画参数"""
animations = {
"gentle_pan": {
"pan": {"start": -0.1, "end": 0.1},
"tilt": {"start": 0.0, "end": 0.0},
"zoom": {"start": 1.0, "end": 1.0},
"rotation": {"start": 0.0, "end": 0.0}
},
"slow_zoom_in": {
"pan": {"start": 0.0, "end": 0.0},
"tilt": {"start": 0.0, "end": 0.0},
"zoom": {"start": 1.0, "end": 1.3},
"rotation": {"start": 0.0, "end": 0.0}
},
"orbit_shot": {
"pan": {"start": -0.3, "end": 0.3},
"tilt": {"start": -0.1, "end": 0.1},