fix: 优化点1,2,3,6

2026-04-02 10:28:20 +08:00 · 2026-04-02 10:28:20 +08:00 · c78fc762f4
parent 93ed944c14
commit c78fc762f4
2 changed files with 754 additions and 185 deletions
--- a/portal-ui/src/components/VideoComposeCard.vue
+++ b/portal-ui/src/components/VideoComposeCard.vue
@ -1,5 +1,11 @@
 <template>
-	<div class="vg-compose-card">
+	<div
+		class="vg-compose-card"
+		:class="{ 'vg-compose-card--drag': dragOver }"
+		@paste.capture="onComposePaste"
+		@dragover.prevent="onDragOver"
+		@dragleave="onDragLeave"
+		@drop.prevent="onDrop">
 		<!-- 左侧面板 - 根据模式动态渲染 -->
 		<div class="vg-compose-left" v-if="!isTextToVideo">
 			<!-- 首帧模式 -->
@ -57,13 +63,13 @@
 						参考图（{{ mediaList.length }}/{{ maxMediaCount }}）
 					</div>
 					<mf-button class="vg-compose-left-upload" size="small" type="primary" @click="openFilePicker">
-						添加图片
+						添加素材
 					</mf-button>
 				</div>

 				<div v-if="mediaList.length === 0" class="vg-compose-empty" @click="openFilePicker">
 					<div class="vg-compose-empty-icon" aria-hidden="true">+</div>
-					<div class="vg-compose-empty-text">点击添加参考图</div>
+					<div class="vg-compose-empty-text">点击、粘贴或拖入图片 / 视频 / 音频</div>
 				</div>

 				<div v-else class="vg-compose-media-scroll">
@ -73,7 +79,14 @@
 						class="vg-compose-media-item"
 						:title="item.name || ''">
 						<div class="vg-compose-media-preview">
-							<img :src="item.url" alt="" />
+							<img v-if="item.mediaType === 'image'" :src="item.url" alt="" />
+							<video
+								v-else-if="item.mediaType === 'video'"
+								:src="item.url"
+								muted
+								playsinline
+								preload="metadata" />
+							<div v-else-if="item.mediaType === 'audio'" class="vg-compose-audio-badge">♪</div>
 						</div>
 						<button
 							type="button"
@ -115,9 +128,18 @@
 						:key="item.key"
 						:class="['vg-mention-item', { active: idx === mentionActiveIndex }]"
 						@mousedown.prevent="selectMentionItem(item)">
-						<img :src="item.url" alt="" class="vg-mention-thumb" />
+						<img v-if="item.mediaType === 'image'" :src="item.url" alt="" class="vg-mention-thumb" />
+						<video
+							v-else-if="item.mediaType === 'video'"
+							:src="item.url"
+							class="vg-mention-thumb vg-mention-thumb-video"
+							muted
+							playsinline
+							preload="metadata" />
+						<div v-else class="vg-mention-thumb vg-mention-thumb-audio">♪</div>
+						<span class="vg-mention-kind">{{ mentionKindLabel(item.mediaType) }}</span>
 					</div>
-					<div v-if="mentionCandidates.length === 0" class="vg-mention-empty">暂无可引用参考图</div>
+					<div v-if="mentionCandidates.length === 0" class="vg-mention-empty">暂无可引用素材（请先上传）</div>
 				</div>
 			</div>

@ -174,6 +196,7 @@ const emit = defineEmits(['update:modelValue', 'update:mediaList'])

 const fileInputRef = ref(null)
 const editorRef = ref(null)
+const dragOver = ref(false)
 const localPrompt = ref(props.modelValue || '')
 const internalMediaList = ref(Array.isArray(props.mediaList) ? [...props.mediaList] : [])
 const currentUploadIndex = ref(-1) // for first/last frame mode
@ -224,14 +247,22 @@ const mentionCandidates = computed(() =>
 	(mediaList.value || [])
 		.filter((i) => {
 			const u = String(i?.url || '').trim()
-			return i?.mediaType === 'image' && !i?.isUploading && /^https?:\/\//i.test(u)
+			const mt = i?.mediaType
+			return ['image', 'video', 'audio'].includes(mt) && !i?.isUploading && /^https?:\/\//i.test(u)
 		})
 		.map((i, idx) => ({
 			key: i.id || i.url || String(idx),
-			url: i.url
+			url: i.url,
+			mediaType: i.mediaType || 'image'
 		}))
 )

+const mentionKindLabel = (mt) => {
+	if (mt === 'video') return '视频'
+	if (mt === 'audio') return '音频'
+	return '图'
+}
+
 const isTextToVideo = computed(() => props.videoMode === 'text-to-video')
 const isFirstFrame = computed(() => props.videoMode === 'image-first-frame')
 const isFirstLastFrame = computed(() => props.videoMode === 'image-first-last-frame')
@ -259,19 +290,22 @@ const openFilePickerFor = (index) => {

 	const acceptAttr = computed(() => {
 		const types = new Set(props.allowedMediaTypes || [])
-		if (types.has('image') && types.has('video')) return 'image/*,video/*'
-		if (types.has('image')) return 'image/*'
-		if (types.has('video')) return 'video/*'
-		return 'image/*,video/*'
+		const parts = []
+		if (types.has('image')) parts.push('image/*')
+		if (types.has('video')) parts.push('video/*')
+		if (types.has('audio')) parts.push('audio/*')
+		return parts.length ? parts.join(',') : 'image/*,video/*,audio/*'
 	})

 const detectMediaType = (file) => {
 	const mime = (file?.type || '').toLowerCase()
 	if (mime.startsWith('image/')) return 'image'
 	if (mime.startsWith('video/')) return 'video'
+	if (mime.startsWith('audio/')) return 'audio'
 	const lowerName = (file?.name || '').toLowerCase()
 	if (/\.(png|jpe?g|gif|webp|bmp|svg)$/.test(lowerName)) return 'image'
-	if (/\.(mp4|mov|webm|m4v|avi|mkv|ogg)$/.test(lowerName)) return 'video'
+	if (/\.(mp4|mov|webm|m4v|avi|mkv)$/.test(lowerName)) return 'video'
+	if (/\.(mp3|wav|m4a|aac|ogg|flac)$/.test(lowerName)) return 'audio'
 	return 'image'
 }

@ -313,107 +347,156 @@ const clearAll = () => {
 	mentionVisible.value = false
 }

-	const handleSelectFiles = async (event) => {
-	const input = event.target
-	const files = Array.from(input.files || [])
-	if (!files.length) return
+	const ingestFileList = async (files) => {
+		if (!files || !files.length) return
+		const fileArr = Array.from(files)

-	const isReferenceMode = isReference.value
-	let targetList = [...mediaList.value]
+		if (isFirstLastFrame.value && currentUploadIndex.value < 0) {
+			if (!mediaList.value[0]) currentUploadIndex.value = 0
+			else if (!mediaList.value[1]) currentUploadIndex.value = 1
+		}

-	// 首尾帧模式特殊处理索引
-	if (isFirstLastFrame.value && currentUploadIndex.value >= 0) {
-		const idx = currentUploadIndex.value
-		if (files.length > 0) {
-			const file = files[0]
-			const mediaType = detectMediaType(file)
-			if (mediaType !== 'image') {
-				Message.warning('首尾帧仅支持图片')
-				input.value = ''
+		const isReferenceMode = isReference.value
+		let targetList = [...mediaList.value]
+
+		if (isFirstLastFrame.value && currentUploadIndex.value >= 0) {
+			const idx = currentUploadIndex.value
+			if (fileArr.length > 0) {
+				const file = fileArr[0]
+				const mediaType = detectMediaType(file)
+				if (mediaType !== 'image') {
+					Message.warning('首尾帧仅支持图片')
+					return
+				}
+				const id = `frame_${Date.now()}`
+				const localPreview = URL.createObjectURL(file)
+				const entry = {
+					id,
+					url: localPreview,
+					mediaType: 'image',
+					name: file.name,
+					_fileRef: file,
+					isUploading: true,
+					label: idx === 0 ? '[首帧]' : '[尾帧]'
+				}
+				targetList[idx] = entry
+			}
+		} else {
+			const remain = props.maxMediaCount - mediaList.value.length
+			if (remain <= 0 && !isReferenceMode) {
+				Message.warning(`最多添加 ${props.maxMediaCount} 个参考素材`)
 				return
 			}
-			const id = `frame_${Date.now()}`
-			const localPreview = URL.createObjectURL(file)
-			const entry = { id, url: localPreview, mediaType: 'image', name: file.name, _fileRef: file, isUploading: true, label: idx === 0 ? '[首帧]' : '[尾帧]' }
-			targetList[idx] = entry
-		}
-	} else {
-		// 其他模式正常处理
-		const remain = props.maxMediaCount - mediaList.value.length
-		if (remain <= 0 && !isReferenceMode) {
-			Message.warning(`最多添加 ${props.maxMediaCount} 个参考素材`)
-			input.value = ''
-			return
-		}

-		const selected = files.slice(0, remain || files.length)
-		const uploadingEntries = []
+			const selected = fileArr.slice(0, remain || fileArr.length)
+			const uploadingEntries = []

-		for (const file of selected) {
-			const mediaType = detectMediaType(file)
-			if (!props.allowedMediaTypes.includes(mediaType)) {
-				Message.warning(`当前模式不支持该类型：${mediaType}`)
-				continue
+			for (const file of selected) {
+				const mediaType = detectMediaType(file)
+				if (!props.allowedMediaTypes.includes(mediaType)) {
+					Message.warning(`当前模式不支持该类型：${mediaType}`)
+					continue
+				}
+
+				const id = `tmp_${Date.now()}_${Math.random().toString(16).slice(2)}`
+				const localPreview = URL.createObjectURL(file)
+
+				const entry = {
+					id,
+					url: localPreview,
+					mediaType,
+					name: file.name,
+					_fileRef: file,
+					isUploading: true
+				}
+				uploadingEntries.push(entry)
 			}

-			const id = `tmp_${Date.now()}_${Math.random().toString(16).slice(2)}`
-			const localPreview = URL.createObjectURL(file)
-
-			const entry = {
-				id,
-				url: localPreview,
-				mediaType,
-				name: file.name,
-				_fileRef: file,
-				isUploading: true
+			if (uploadingEntries.length) {
+				targetList = [...targetList, ...uploadingEntries]
 			}
-			uploadingEntries.push(entry)
 		}

-		if (uploadingEntries.length) {
-			targetList = [...targetList, ...uploadingEntries]
-		}
-	}
-
-	setMediaList(targetList)
-	await nextTick()
-
-	// 上传处理
-	const toUpload = targetList.filter(item => item.isUploading)
-	for (const entry of toUpload) {
-		try {
-			const res = await uploadFile({
-				url: PORTAL_TENCENT_COS_UPLOAD_URL,
-				file: entry._fileRef,
-				name: 'file'
-			})
-			const url = extractUploadUrlFromResponse(res)
-			if (!url) throw new Error(res?.msg || '未返回文件地址')
-
-			const localPreview = entry.url
-			setMediaList(
-				mediaList.value.map((x) =>
-					normalizeItemKey(x) === normalizeItemKey(entry) 
-						? { ...x, url, isUploading: false } 
-						: x
-				)
-			)
-			if (isReferenceMode) {
-				Message.success('已上传完成')
-			}
+		setMediaList(targetList)
+		await nextTick()

+		const toUpload = targetList.filter((item) => item.isUploading)
+		for (const entry of toUpload) {
 			try {
-				URL.revokeObjectURL(localPreview)
-			} catch (_) {}
-		} catch (err) {
-			setMediaList(mediaList.value.filter((x) => normalizeItemKey(x) !== normalizeItemKey(entry)))
-			Message.error('上传失败，请重试')
+				const res = await uploadFile({
+					url: PORTAL_TENCENT_COS_UPLOAD_URL,
+					file: entry._fileRef,
+					name: 'file'
+				})
+				const url = extractUploadUrlFromResponse(res)
+				if (!url) throw new Error(res?.msg || '未返回文件地址')
+
+				const localPreview = entry.url
+				setMediaList(
+					mediaList.value.map((x) =>
+						normalizeItemKey(x) === normalizeItemKey(entry) ? { ...x, url, isUploading: false } : x
+					)
+				)
+				if (isReferenceMode) {
+					Message.success('已上传完成')
+				}
+
+				try {
+					URL.revokeObjectURL(localPreview)
+				} catch (_) {}
+			} catch (err) {
+				setMediaList(mediaList.value.filter((x) => normalizeItemKey(x) !== normalizeItemKey(entry)))
+				Message.error('上传失败，请重试')
+			}
 		}
+
+		currentUploadIndex.value = -1
 	}

-	input.value = ''
-	currentUploadIndex.value = -1
-}
+	const handleSelectFiles = async (event) => {
+		const input = event.target
+		const files = Array.from(input.files || [])
+		if (!files.length) return
+		await ingestFileList(files)
+		input.value = ''
+		currentUploadIndex.value = -1
+	}
+
+	const onComposePaste = (e) => {
+		const files = []
+		if (e.clipboardData?.files?.length) {
+			for (const f of e.clipboardData.files) {
+				if (f) files.push(f)
+			}
+		}
+		if (!files.length && e.clipboardData?.items) {
+			for (const item of e.clipboardData.items) {
+				if (item.kind === 'file' && item.type.startsWith('image/')) {
+					const f = item.getAsFile()
+					if (f) files.push(f)
+				}
+			}
+		}
+		if (!files.length) return
+		e.preventDefault()
+		ingestFileList(files)
+	}
+
+	const onDragLeave = (e) => {
+		if (e.currentTarget.contains(e.relatedTarget)) return
+		dragOver.value = false
+	}
+
+	const onDragOver = (e) => {
+		e.dataTransfer.dropEffect = 'copy'
+		dragOver.value = true
+	}
+
+	const onDrop = (e) => {
+		dragOver.value = false
+		const files = Array.from(e.dataTransfer?.files || [])
+		if (files.length) ingestFileList(files)
+	}

 const onPromptInput = (e) => {
 	setPrompt(e.target.value)
@ -520,42 +603,69 @@ const getUniqueRefUrlsInDoc = () => {
 	return s
 }

-/** 按文档顺序为不同 URL 分配 图1–图4，并同步 data-token / 展示 */
+/** 按文档顺序为不同 URL 分配 [图n]/[视频n]/[音频n]，并同步 data-token / 展示 */
 const renumberAllReferenceMentions = () => {
 	if (!editorRef.value || !isReference.value) return
 	const refs = Array.from(editorRef.value.querySelectorAll('.vg-inline-ref[data-mention-reference="1"]'))
-	const urlToNo = new Map()
-	let next = 1
+	const imgMap = new Map()
+	const vidMap = new Map()
+	const audMap = new Map()
+	let imgNext = 1
+	let vidNext = 1
+	let audNext = 1
 	let droppedExtra = false
 	for (const el of refs) {
 		const u = el.getAttribute('data-reference-url') || ''
+		const kind = el.getAttribute('data-reference-kind') || 'image'
 		if (!u) {
 			el.remove()
 			continue
 		}
-		if (!urlToNo.has(u)) {
-			if (next > MAX_REFERENCE_UNIQUE) {
-				el.remove()
-				droppedExtra = true
-				continue
+		let token = ''
+		if (kind === 'video') {
+			if (!vidMap.has(u)) {
+				if (vidNext > MAX_REFERENCE_UNIQUE) {
+					el.remove()
+					droppedExtra = true
+					continue
+				}
+				vidMap.set(u, vidNext++)
 			}
-			urlToNo.set(u, next++)
+			token = `[视频${vidMap.get(u)}]`
+		} else if (kind === 'audio') {
+			if (!audMap.has(u)) {
+				if (audNext > MAX_REFERENCE_UNIQUE) {
+					el.remove()
+					droppedExtra = true
+					continue
+				}
+				audMap.set(u, audNext++)
+			}
+			token = `[音频${audMap.get(u)}]`
+		} else {
+			if (!imgMap.has(u)) {
+				if (imgNext > MAX_REFERENCE_UNIQUE) {
+					el.remove()
+					droppedExtra = true
+					continue
+				}
+				imgMap.set(u, imgNext++)
+			}
+			token = `[图${imgMap.get(u)}]`
 		}
-		const n = urlToNo.get(u)
-		const token = `[图${n}]`
 		el.setAttribute('data-token', token)
-		const imgEl = el.querySelector('.vg-inline-ref-image')
-		if (imgEl) imgEl.alt = token
+		const imgEl = el.querySelector('.vg-inline-ref-image, .vg-inline-ref-video')
+		if (imgEl) imgEl.setAttribute('alt', token)
 	}
 	if (droppedExtra) {
-		Message.warning(`最多 ${MAX_REFERENCE_UNIQUE} 张不同参考图，已移除多余引用`)
+		Message.warning(`每类参考最多 ${MAX_REFERENCE_UNIQUE} 个不同素材，已移除多余引用`)
 	}
 }

 const reconcileReferenceMentions = (nextList) => {
 	if (!editorRef.value) return
 	const allowed = new Set(
-		(nextList || []).filter((x) => x?.mediaType === 'image' && x?.url).map((x) => x.url)
+		(nextList || []).filter((x) => x?.url && ['image', 'video', 'audio'].includes(x?.mediaType)).map((x) => x.url)
 	)
 	editorRef.value.querySelectorAll('.vg-inline-ref[data-mention-reference="1"]').forEach((el) => {
 		const u = el.getAttribute('data-reference-url') || ''
@ -565,43 +675,43 @@ const reconcileReferenceMentions = (nextList) => {
 	setPrompt(getEditorPlainText())
 }

-/** 文档顺序下首次出现的参考图 URL（对应 图1、图2…） */
-const collectReferenceUrlsInDocOrder = () => {
+/** 文档顺序下的参考素材（与占位符顺序一致） */
+const collectReferenceMentionsInDocOrder = () => {
 	const editor = editorRef.value
 	if (!editor) return []
-	const urls = []
-	const seen = new Set()
+	const out = []
 	const walk = (node) => {
 		if (node.nodeType === Node.TEXT_NODE) return
 		if (node.nodeType !== Node.ELEMENT_NODE) return
 		const el = node
 		if (el.dataset?.mentionReference === '1') {
 			const url = el.getAttribute('data-reference-url') || ''
-			if (url && !seen.has(url)) {
-				seen.add(url)
-				urls.push(url)
-			}
+			const kind = el.getAttribute('data-reference-kind') || 'image'
+			if (url) out.push({ url, kind })
 			return
 		}
 		Array.from(el.childNodes).forEach(walk)
 	}
 	Array.from(editor.childNodes).forEach(walk)
-	return urls
+	return out
 }

 /**
- * 参考图模式提交用：第 1 条必须是 text（整段文案，仅含 [图n]，不含 URL）；
- * 其后按 图1→图n 顺序各一条 image_url + reference_image。
+ * 参考图模式提交用：首条 text；其后按文中 [图n]/[视频n]/[音频n] 顺序对应各 reference_*。
 */
 const getImageReferenceContentItems = () => {
 	const text = getEditorPlainText()
 	const first = { type: 'text', text: text || '' }
-	const urls = collectReferenceUrlsInDocOrder()
-	const rest = urls.map((url) => ({
-		type: 'image_url',
-		image_url: { url },
-		role: 'reference_image'
-	}))
+	const mentions = collectReferenceMentionsInDocOrder()
+	const rest = mentions.map(({ url, kind }) => {
+		if (kind === 'video') {
+			return { type: 'video_url', video_url: { url }, role: 'reference_video' }
+		}
+		if (kind === 'audio') {
+			return { type: 'audio_url', audio_url: { url }, role: 'reference_audio' }
+		}
+		return { type: 'image_url', image_url: { url }, role: 'reference_image' }
+	})
 	return [first, ...rest]
 }

@ -789,11 +899,117 @@ const onEditorKeyup = (e) => {
 	mentionActiveIndex.value = mentionVisible.value && mentionCandidates.value.length ? 0 : -1
 }

+const buildMentionHolder = (url, kind) => {
+	const holder = document.createElement('span')
+	holder.className = 'vg-inline-ref'
+	holder.setAttribute('data-mention-reference', '1')
+	holder.setAttribute('data-token', '[?]')
+	holder.setAttribute('data-reference-url', url)
+	holder.setAttribute('data-reference-kind', kind)
+	holder.setAttribute('contenteditable', 'false')
+
+	if (kind === 'image') {
+		const img = document.createElement('img')
+		img.src = url
+		img.alt = ''
+		img.setAttribute('draggable', 'false')
+		img.setAttribute('contenteditable', 'false')
+		img.className = 'vg-inline-ref-image'
+		holder.appendChild(img)
+	} else if (kind === 'video') {
+		const v = document.createElement('video')
+		v.src = url
+		v.className = 'vg-inline-ref-image vg-inline-ref-video'
+		v.setAttribute('muted', '')
+		v.setAttribute('playsinline', '')
+		v.setAttribute('preload', 'metadata')
+		v.setAttribute('draggable', 'false')
+		holder.appendChild(v)
+	} else {
+		const badge = document.createElement('span')
+		badge.className = 'vg-inline-ref-audio'
+		badge.textContent = '♪'
+		badge.setAttribute('title', '音频')
+		holder.appendChild(badge)
+	}
+	return holder
+}
+
+const applyReferenceFromHistory = ({ text, contentItems }) => {
+	if (!editorRef.value || !isReference.value) return
+	const items = Array.isArray(contentItems) ? contentItems : []
+	const media = []
+	let nid = 0
+	const pickImageUrl = (it) => it?.image_url?.url || it?.imageUrl?.url
+	const pickVideoUrl = (it) => it?.video_url?.url || it?.videoUrl?.url
+	const pickAudioUrl = (it) => it?.audio_url?.url || it?.audioUrl?.url
+	for (const it of items) {
+		if (!it || it.type === 'text') continue
+		if (it.type === 'image_url' && it.role === 'reference_image') {
+			const u = pickImageUrl(it)
+			if (u) media.push({ id: `hist_${nid++}`, url: u, mediaType: 'image', name: '参考图' })
+		}
+		if (it.type === 'video_url' && it.role === 'reference_video') {
+			const u = pickVideoUrl(it)
+			if (u) media.push({ id: `hist_${nid++}`, url: u, mediaType: 'video', name: '参考视频' })
+		}
+		if (it.type === 'audio_url' && it.role === 'reference_audio') {
+			const u = pickAudioUrl(it)
+			if (u) media.push({ id: `hist_${nid++}`, url: u, mediaType: 'audio', name: '参考音频' })
+		}
+	}
+	setMediaList(media)
+
+	const pools = { image: [], video: [], audio: [] }
+	for (const it of items) {
+		if (!it || it.type === 'text') continue
+		if (it.type === 'image_url' && it.role === 'reference_image' && pickImageUrl(it)) pools.image.push(pickImageUrl(it))
+		if (it.type === 'video_url' && it.role === 'reference_video' && pickVideoUrl(it)) pools.video.push(pickVideoUrl(it))
+		if (it.type === 'audio_url' && it.role === 'reference_audio' && pickAudioUrl(it)) pools.audio.push(pickAudioUrl(it))
+	}
+
+	const resolveTok = (tok) => {
+		const a = /\[图(\d+)\]/.exec(tok)
+		if (a) return { kind: 'image', url: pools.image[parseInt(a[1], 10) - 1] }
+		const b = /\[视频(\d+)\]/.exec(tok)
+		if (b) return { kind: 'video', url: pools.video[parseInt(b[1], 10) - 1] }
+		const c = /\[音频(\d+)\]/.exec(tok)
+		if (c) return { kind: 'audio', url: pools.audio[parseInt(c[1], 10) - 1] }
+		return null
+	}
+
+	const s = text || ''
+	const re = /(\[图\d+\]|\[视频\d+\]|\[音频\d+\])/g
+	editorRef.value.innerHTML = ''
+	const frag = document.createDocumentFragment()
+	let last = 0
+	let m
+	while ((m = re.exec(s)) !== null) {
+		if (m.index > last) frag.appendChild(document.createTextNode(s.slice(last, m.index)))
+		const tok = m[0]
+		const r = resolveTok(tok)
+		if (r?.url) frag.appendChild(buildMentionHolder(r.url, r.kind))
+		else frag.appendChild(document.createTextNode(tok))
+		last = m.index + m[0].length
+	}
+	if (last < s.length) frag.appendChild(document.createTextNode(s.slice(last)))
+	editorRef.value.appendChild(frag)
+	renumberAllReferenceMentions()
+	setPrompt(getEditorPlainText())
+}
+
 const selectMentionItem = (item) => {
 	if (!item?.url || !editorRef.value) return
-	const urls = getUniqueRefUrlsInDoc()
-	if (!urls.has(item.url) && urls.size >= MAX_REFERENCE_UNIQUE) {
-		Message.warning(`最多 ${MAX_REFERENCE_UNIQUE} 张不同参考图，无法再插入新图`)
+	const kind = item.mediaType === 'video' ? 'video' : item.mediaType === 'audio' ? 'audio' : 'image'
+	const uniqueByKind = { image: new Set(), video: new Set(), audio: new Set() }
+	editorRef.value.querySelectorAll('.vg-inline-ref[data-mention-reference="1"]').forEach((el) => {
+		const u = el.getAttribute('data-reference-url')
+		const k = el.getAttribute('data-reference-kind') || 'image'
+		if (u) uniqueByKind[k]?.add(u)
+	})
+	const keySet = uniqueByKind[kind] || uniqueByKind.image
+	if (!keySet.has(item.url) && keySet.size >= MAX_REFERENCE_UNIQUE) {
+		Message.warning(`该类型最多 ${MAX_REFERENCE_UNIQUE} 个不同素材`)
 		mentionVisible.value = false
 		return
 	}
@ -807,25 +1023,9 @@ const selectMentionItem = (item) => {
 	const range = selection.getRangeAt(0)
 	if (!editorRef.value.contains(range.commonAncestorContainer)) return

-	const token = '[图?]'
-	const holder = document.createElement('span')
-	holder.className = 'vg-inline-ref'
-	holder.setAttribute('data-mention-reference', '1')
-	holder.setAttribute('data-token', token)
-	holder.setAttribute('data-reference-url', item.url)
-	holder.setAttribute('contenteditable', 'false')
-
-	const img = document.createElement('img')
-	img.src = item.url
-	img.alt = ''
-	img.setAttribute('draggable', 'false')
-	img.setAttribute('contenteditable', 'false')
-	img.className = 'vg-inline-ref-image'
-
-	holder.appendChild(img)
+	const holder = buildMentionHolder(item.url, kind)

 	range.insertNode(holder)
-	// 不在引用后自动加空格，保证导出 text 为 「描述[图1]描述[图2]…」，占位与描述紧挨在用户输入的位置
 	range.setStartAfter(holder)
 	range.collapse(true)
 	selection.removeAllRanges()
@ -841,6 +1041,7 @@ const selectMentionItem = (item) => {
 defineExpose({
 	getEditorPlainText,
 	getImageReferenceContentItems,
+	applyReferenceFromHistory,
 	clearPromptOnly: () => {
 		setPrompt('')
 		if (editorRef.value) editorRef.value.innerHTML = ''
@ -861,6 +1062,13 @@ defineExpose({
 	padding: 10px;
 	box-shadow: 0 24px 64px rgba(0, 0, 0, 0.35), inset 0 1px 0 rgba(255, 255, 255, 0.06);
 	min-height: 240px;
+	transition: outline 0.15s ease, background 0.15s ease;
+}
+
+.vg-compose-card--drag {
+	outline: 2px dashed rgba(0, 202, 224, 0.55);
+	outline-offset: 2px;
+	background: rgba(0, 202, 224, 0.06);
 }

 .vg-compose-left {
@ -1052,6 +1260,17 @@ defineExpose({
 		object-fit: cover;
 		display: block;
 	}
+
+	.vg-compose-audio-badge {
+		width: 100%;
+		height: 100%;
+		display: flex;
+		align-items: center;
+		justify-content: center;
+		font-size: 28px;
+		color: rgba(0, 202, 224, 0.95);
+		background: rgba(0, 0, 0, 0.35);
+	}
 }

 .vg-compose-remove-btn {
@ -1179,7 +1398,8 @@ defineExpose({
 	user-select: none;
 }

-.vg-rich-editor :deep(.vg-inline-ref-image) {
+.vg-rich-editor :deep(.vg-inline-ref-image),
+.vg-rich-editor :deep(.vg-inline-ref-video) {
 	display: block;
 	width: auto;
 	height: auto;
@ -1187,10 +1407,43 @@ defineExpose({
 	max-height: 72px;
 	object-fit: cover;
 	border-radius: 6px;
-	vertical-align: middle;
 	pointer-events: none;
 }

+.vg-rich-editor :deep(.vg-inline-ref-audio) {
+	display: inline-flex;
+	align-items: center;
+	justify-content: center;
+	min-width: 48px;
+	min-height: 40px;
+	padding: 0 8px;
+	font-size: 18px;
+	color: rgba(0, 202, 224, 0.95);
+	background: rgba(0, 0, 0, 0.35);
+	border-radius: 8px;
+	pointer-events: none;
+}
+
+.vg-mention-kind {
+	font-size: 11px;
+	color: rgba(255, 255, 255, 0.55);
+	margin-left: 4px;
+}
+
+.vg-mention-thumb-video {
+	object-fit: cover;
+	background: #000;
+}
+
+.vg-mention-thumb-audio {
+	display: flex;
+	align-items: center;
+	justify-content: center;
+	background: rgba(0, 202, 224, 0.12);
+	color: rgba(0, 202, 224, 0.95);
+	font-size: 16px;
+}
+
 .vg-mention-panel {
 	position: absolute;
 	left: 8px;
@ -1208,6 +1461,7 @@ defineExpose({
 .vg-mention-item {
 	display: flex;
 	align-items: center;
+	flex-wrap: nowrap;
 	gap: 8px;
 	padding: 8px 10px;
 	cursor: pointer;
--- a/portal-ui/src/views/VideoGen.vue
+++ b/portal-ui/src/views/VideoGen.vue
@ -23,18 +23,36 @@
 							<template v-for="(seg, idx) in getRowPromptSegments(row)" :key="`${row.id || 'row'}_${idx}`">
 								<span v-if="seg.type === 'text'">{{ seg.text }}</span>
 								<img
-									v-else
+									v-else-if="seg.type === 'image'"
 									class="vg-chat-inline-ref-image"
 									:src="seg.url"
 									:alt="seg.token || ''" />
+								<video
+									v-else-if="seg.type === 'video'"
+									class="vg-chat-inline-ref-video"
+									:src="seg.url"
+									muted
+									playsinline
+									preload="metadata" />
+								<span v-else-if="seg.type === 'audio'" class="vg-chat-inline-audio" title="音频">♪</span>
 							</template>
 						</div>

+						<div v-if="taskParamLine(row)" class="vg-chat-task-params">
+							{{ taskParamLine(row) }}
+						</div>
+
+						<div class="vg-chat-user-actions">
+							<button type="button" class="vg-chat-action-btn" @click="reEditTask(row)">重新编辑</button>
+							<button type="button" class="vg-chat-action-btn vg-chat-action-primary" @click="regenerateTask(row)">
+								重新生成
+							</button>
+						</div>
+
 						<div class="vg-chat-time">{{ formatCreateTime(row.createTime) }}</div>
 					</div>

-					<!-- 分隔线 -->
-					<div class="vg-chat-divider"></div>
+					<div class="vg-chat-divider" aria-hidden="true"></div>

 					<!-- AI响应部分（视频结果） -->
 					<div class="vg-chat-ai-section">
@ -60,6 +78,9 @@
 							<div v-else class="vg-chat-result-link">
 								<a :href="row.result" target="_blank" rel="noreferrer">查看结果</a>
 							</div>
+							<div v-if="isVideoUrl(row.result)" class="vg-chat-download-row">
+								<button type="button" class="vg-download-btn" @click="downloadVideoUrl(row.result)">下载视频</button>
+							</div>
 						</div>

 						<div
@ -254,7 +275,8 @@ export default {
 		},
 		allowedMediaTypes() {
 			if (this.videoMode === 'image-first-frame' || this.videoMode === 'image-first-last-frame') return ['image']
-			if (this.videoMode === 'image-reference') return ['image']
+			if (this.videoMode === 'image-reference') return ['image', 'video', 'audio']
+			if (this.videoMode === 'text-to-video') return ['image', 'video', 'audio']
 			return ['image', 'video']
 		},
 		posterUrl() {
@ -589,34 +611,220 @@ export default {
 				return segs
 			}

-			// 参考图模式：按 [图n] 替换为对应 reference_image
-			const refs = this.getRowReferenceImageUrls(row)
-			if (!refs.length) {
-				return [{ type: 'text', text }]
-			}
-			const tokenReg = /\[图(\d+)\]/g
-			const segments = []
-			let last = 0
-			let m
-			while ((m = tokenReg.exec(text)) !== null) {
-				const token = m[0]
-				const idx = Number(m[1]) - 1
-				const start = m.index
-				if (start > last) {
-					segments.push({ type: 'text', text: text.slice(last, start) })
+			if (mode === 'image-reference') {
+				const { imgs, vids, auds } = this.getRowReferencePools(row)
+				const combined = /\[图(\d+)\]|\[视频(\d+)\]|\[音频(\d+)\]/g
+				const segments = []
+				let last = 0
+				let m
+				while ((m = combined.exec(text)) !== null) {
+					if (m.index > last) {
+						segments.push({ type: 'text', text: text.slice(last, m.index) })
+					}
+					if (m[1]) {
+						const idx = parseInt(m[1], 10) - 1
+						const url = imgs[idx]
+						if (url) segments.push({ type: 'image', url, token: m[0] })
+						else segments.push({ type: 'text', text: m[0] })
+					} else if (m[2]) {
+						const idx = parseInt(m[2], 10) - 1
+						const url = vids[idx]
+						if (url) segments.push({ type: 'video', url, token: m[0] })
+						else segments.push({ type: 'text', text: m[0] })
+					} else if (m[3]) {
+						const idx = parseInt(m[3], 10) - 1
+						const url = auds[idx]
+						if (url) segments.push({ type: 'audio', url, token: m[0] })
+						else segments.push({ type: 'text', text: m[0] })
+					}
+					last = m.index + m[0].length
 				}
-				const url = idx >= 0 ? refs[idx] : ''
-				if (url) {
-					segments.push({ type: 'image', url, token })
-				} else {
-					segments.push({ type: 'text', text: token })
+				if (last < text.length) {
+					segments.push({ type: 'text', text: text.slice(last) })
 				}
-				last = start + token.length
+				if (segments.length) return segments
+				const refs = this.getRowReferenceImageUrls(row)
+				if (!refs.length) {
+					return [{ type: 'text', text }]
+				}
+				const tokenReg = /\[图(\d+)\]/g
+				const segments2 = []
+				last = 0
+				while ((m = tokenReg.exec(text)) !== null) {
+					const token = m[0]
+					const idx = Number(m[1]) - 1
+					const start = m.index
+					if (start > last) {
+						segments2.push({ type: 'text', text: text.slice(last, start) })
+					}
+					const url = idx >= 0 ? refs[idx] : ''
+					if (url) {
+						segments2.push({ type: 'image', url, token })
+					} else {
+						segments2.push({ type: 'text', text: token })
+					}
+					last = start + token.length
+				}
+				if (last < text.length) {
+					segments2.push({ type: 'text', text: text.slice(last) })
+				}
+				return segments2.length ? segments2 : [{ type: 'text', text }]
 			}
-			if (last < text.length) {
-				segments.push({ type: 'text', text: text.slice(last) })
+
+			return [{ type: 'text', text }]
+		},
+
+		getRowReferencePools(row) {
+			const imgs = []
+			const vids = []
+			const auds = []
+			try {
+				const vp = typeof row.videoParams === 'string' ? JSON.parse(row.videoParams) : row.videoParams
+				const content = Array.isArray(vp?.content) ? vp.content : []
+				for (const it of content) {
+					if (!it || it.type === 'text') continue
+					if (it.type === 'image_url' && it.role === 'reference_image') {
+						const u = it.image_url?.url || it.imageUrl?.url
+						if (u) imgs.push(u)
+					}
+					if (it.type === 'video_url' && it.role === 'reference_video') {
+						const u = it.video_url?.url || it.videoUrl?.url
+						if (u) vids.push(u)
+					}
+					if (it.type === 'audio_url' && it.role === 'reference_audio') {
+						const u = it.audio_url?.url || it.audioUrl?.url
+						if (u) auds.push(u)
+					}
+				}
+			} catch (_) {
+				/* ignore */
 			}
-			return segments.length ? segments : [{ type: 'text', text }]
+			return { imgs, vids, auds }
+		},
+
+		safeParseVideoParams(raw) {
+			if (raw == null) return null
+			if (typeof raw === 'object') return raw
+			try {
+				return JSON.parse(raw)
+			} catch (_) {
+				return null
+			}
+		},
+
+		parseRowParams(row) {
+			let model = row.model
+			let ratio = row.ratio
+			let duration = row.duration
+			let resolution = row.resolution
+			const vp = this.safeParseVideoParams(row.videoParams)
+			if (vp) {
+				if (vp.model) model = vp.model
+				if (vp.ratio) ratio = vp.ratio
+				if (vp.duration != null) duration = vp.duration
+				if (vp.resolution) resolution = vp.resolution
+			}
+			return { model, ratio, duration, resolution }
+		},
+
+		taskParamLine(row) {
+			const p = this.parseRowParams(row)
+			const parts = []
+			if (p.model) parts.push(`模型 ${p.model}`)
+			if (p.ratio) parts.push(`比例 ${p.ratio}`)
+			if (p.duration != null && p.duration !== '') parts.push(`时长 ${p.duration}s`)
+			if (p.resolution) parts.push(`分辨率 ${p.resolution}`)
+			return parts.join(' · ')
+		},
+
+		buildMediaListFromVideoParams(row) {
+			const out = []
+			try {
+				const vp = typeof row.videoParams === 'string' ? JSON.parse(row.videoParams) : row.videoParams
+				const content = Array.isArray(vp?.content) ? vp.content : []
+				for (const it of content) {
+					if (!it || it.type === 'text') continue
+					if (it.type === 'image_url' && it.role === 'reference_image') {
+						const u = it.image_url?.url || it.imageUrl?.url
+						if (u) out.push({ id: `c_${out.length}`, url: u, mediaType: 'image' })
+					}
+					if (it.type === 'video_url' && it.role === 'reference_video') {
+						const u = it.video_url?.url || it.videoUrl?.url
+						if (u) out.push({ id: `c_${out.length}`, url: u, mediaType: 'video' })
+					}
+					if (it.type === 'audio_url' && it.role === 'reference_audio') {
+						const u = it.audio_url?.url || it.audioUrl?.url
+						if (u) out.push({ id: `c_${out.length}`, url: u, mediaType: 'audio' })
+					}
+				}
+			} catch (_) {
+				/* ignore */
+			}
+			return out
+		},
+
+		async reEditTask(row, opts = {}) {
+			const silent = !!opts.silent
+			const mode = row.mode || 'text-to-video'
+			this.videoMode = mode
+			await this.$nextTick()
+			const p = this.parseRowParams(row)
+			if (p.model) this.selectedModel = p.model
+			if (p.ratio) this.selectedRatio = p.ratio
+			if (p.duration != null && p.duration !== '') this.selectedDuration = Number(p.duration)
+			if (p.resolution) this.selectedResolution = p.resolution
+
+			if (mode === 'image-reference') {
+				const vp = this.safeParseVideoParams(row.videoParams)
+				this.promptText = row.text || ''
+				await this.$nextTick()
+				this.$refs.videoComposeRef?.applyReferenceFromHistory?.({
+					text: row.text || '',
+					contentItems: vp?.content || []
+				})
+				if (!silent) this.$message?.success?.('已载入该条任务到编辑区')
+				return
+			}
+
+			if (mode === 'image-first-frame') {
+				this.promptText = row.text || ''
+				this.mediaList = row.img1 ? [{ id: 'h1', url: row.img1, mediaType: 'image' }] : []
+				if (!silent) this.$message?.success?.('已载入该条任务到编辑区')
+				return
+			}
+
+			if (mode === 'image-first-last-frame') {
+				this.promptText = row.text || ''
+				const ml = []
+				if (row.img1) ml.push({ id: 'h1', url: row.img1, mediaType: 'image' })
+				if (row.img2) ml.push({ id: 'h2', url: row.img2, mediaType: 'image' })
+				this.mediaList = ml
+				if (!silent) this.$message?.success?.('已载入该条任务到编辑区')
+				return
+			}
+
+			this.promptText = row.text || ''
+			this.mediaList = this.buildMediaListFromVideoParams(row)
+			if (!silent) this.$message?.success?.('已载入该条任务到编辑区')
+		},
+
+		async regenerateTask(row) {
+			await this.reEditTask(row, { silent: true })
+			await this.$nextTick()
+			await this.generateVideo()
+		},
+
+		downloadVideoUrl(url) {
+			const u = String(url || '').trim()
+			if (!u) return
+			const link = document.createElement('a')
+			link.href = u
+			link.download = `video_${Date.now()}.mp4`
+			link.target = '_blank'
+			link.rel = 'noreferrer'
+			document.body.appendChild(link)
+			link.click()
+			document.body.removeChild(link)
 		},

 		async cancelRowTask(row) {
@ -719,11 +927,28 @@ export default {
 				}
 				if (attachments.length) {
 					contentItems.push(
-						...attachments.map((item) => ({
-							type: 'image_url',
-							image_url: { url: item.url },
-							role: 'reference_image'
-						}))
+						...attachments.map((item) => {
+							const mt = item.mediaType
+							if (mt === 'video') {
+								return {
+									type: 'video_url',
+									video_url: { url: item.url },
+									role: 'reference_video'
+								}
+							}
+							if (mt === 'audio') {
+								return {
+									type: 'audio_url',
+									audio_url: { url: item.url },
+									role: 'reference_audio'
+								}
+							}
+							return {
+								type: 'image_url',
+								image_url: { url: item.url },
+								role: 'reference_image'
+							}
+						})
 					)
 				}
 				if (contentItems.length) {
@ -1457,6 +1682,96 @@ export default {
 	border: 1px solid rgba(255, 255, 255, 0.12);
 }

+.vg-chat-inline-ref-video {
+	display: inline-block;
+	width: 88px;
+	height: 50px;
+	object-fit: cover;
+	border-radius: 6px;
+	vertical-align: middle;
+	margin: 0 4px;
+	border: 1px solid rgba(255, 255, 255, 0.12);
+	background: #000;
+}
+
+.vg-chat-inline-audio {
+	display: inline-flex;
+	align-items: center;
+	justify-content: center;
+	width: 36px;
+	height: 36px;
+	margin: 0 4px;
+	vertical-align: middle;
+	border-radius: 8px;
+	background: rgba(0, 202, 224, 0.15);
+	border: 1px solid rgba(0, 202, 224, 0.35);
+	font-size: 18px;
+	color: var(--vg-cyan);
+}
+
+.vg-chat-task-params {
+	margin-top: 10px;
+	font-size: 12px;
+	line-height: 1.5;
+	color: var(--vg-muted);
+	padding: 8px 10px;
+	background: rgba(0, 0, 0, 0.2);
+	border-radius: 10px;
+	border: 1px solid var(--vg-border);
+}
+
+.vg-chat-user-actions {
+	display: flex;
+	flex-wrap: wrap;
+	gap: 8px;
+	margin-top: 10px;
+}
+
+.vg-chat-action-btn {
+	font-size: 12px;
+	padding: 5px 12px;
+	border-radius: 8px;
+	border: 1px solid var(--vg-border);
+	background: rgba(255, 255, 255, 0.06);
+	color: var(--vg-muted);
+	cursor: pointer;
+}
+
+.vg-chat-action-btn:hover {
+	color: var(--vg-text);
+	border-color: rgba(0, 202, 224, 0.35);
+}
+
+.vg-chat-action-primary {
+	color: var(--vg-cyan);
+	border-color: rgba(0, 202, 224, 0.35);
+}
+
+.vg-chat-divider {
+	height: 0;
+	margin: 0;
+	border: none;
+}
+
+.vg-chat-download-row {
+	margin-top: 12px;
+}
+
+.vg-download-btn {
+	font-size: 13px;
+	padding: 8px 16px;
+	border-radius: 10px;
+	border: 1px solid rgba(0, 202, 224, 0.45);
+	background: rgba(0, 202, 224, 0.12);
+	color: var(--vg-cyan);
+	font-weight: 600;
+	cursor: pointer;
+}
+
+.vg-download-btn:hover {
+	background: rgba(0, 202, 224, 0.2);
+}
+
 .vg-chat-time {
 	margin-top: 8px;
 	font-size: 12px;