Commit c98e4caf by 陶然

新增图片解析job

parent 423b2476
......@@ -42,7 +42,7 @@ namespace Njust.Pdf.Analysis.Entities
public string Keywords { get; set; }
[ApiMember(Description = "发表日期")]
public DateTime? PublishTime { get; set; }
public string PublishTime { get; set; }
[ApiMember(Description = "机构")]
[StringLength(500)]
......@@ -120,9 +120,6 @@ namespace Njust.Pdf.Analysis.Entities
public string DomainName { get; set; }
}
/// <summary>
/// 导入系统:0 全文解析:25 确认后:50 复核后:100
/// </summary>
[Alias(Configs.TableAnalyseName)]
[Api(Description ="文献解析表")]
public class Analyse :AnalyseBase,IEntityInAssemblyDb
......@@ -132,9 +129,6 @@ namespace Njust.Pdf.Analysis.Entities
}
/// <summary>
/// 导入系统:0 全文解析:25 确认后:50 复核后:100
/// </summary>
[Alias(Configs.TableAnalyseName)]
[Api(Description = "文献解析表")]
public class AnalyseAll : AnalyseBase, IEntityInAssemblyDb
......
......@@ -50,9 +50,9 @@ namespace Njust.Pdf.Analysis.Jobs
try
{
var query = conn.From<AnalyseAll>();
query.Take(10);
query.Take(5);
query.Skip(0);
query.Where(o => o.Status < 4);//如果异常了,会重试
query.Where(o => o.IsAnlysis == false);//如果异常了,会重试
query.OrderBy(o => o.CreateTime).ThenBy(o => o.Kvid);
var entities = conn.Select(query);
if (entities.IsNullOrEmpty())
......@@ -77,21 +77,99 @@ namespace Njust.Pdf.Analysis.Jobs
text = regex.Replace(text, "");
item.Allbody = text;
item.AddOnlyProperties(o => o.Allbody);
//item.AnlysisTime = DateTime.Now;
//item.AddOnlyProperties(o => o.AnlysisTime);
//item.AnlysisUserName = KiviiContext.CurrentMember.FullName;
//item.AddOnlyProperties(o => o.AnlysisUserName);
//item.IsAnlysis = true;
//item.AddOnlyProperties(o => o.IsAnlysis);
item.Status = 25;//解析了全文 后状态为50
item.AddOnlyProperties(o => o.Status);
item.AnlysisTime = DateTime.Now;
item.AddOnlyProperties(o => o.AnlysisTime);
item.AnlysisUserName = KiviiContext.CurrentMember.FullName;
item.AddOnlyProperties(o => o.AnlysisUserName);
item.IsAnlysis = true;
item.AddOnlyProperties(o => o.IsAnlysis);
conn.UpdateOnly(item);
succeed++;
}
catch (Exception ex)
{
TaskContext.Message = $"处理消息异常:共处理{succeed}/{total},Kvid:{item.Kvid},Ex:{ex.Message}";
item.Status += 1;
item.AddOnlyProperties(o => o.Status);
conn.UpdateOnly(item);
return false;
}
}
}
catch (Exception ex)
{
TaskContext.Message = $"处理消息异常:共处理{succeed}/{total},Ex:{ex.Message}";
return false;
}
TaskContext.Message = $"完成:共处理{succeed}/{total}";
return true;
}
}
public class AnalyseImageJob : IJob
{
public string Name
{
get
{
return "抓取PDF中的图片";
}
}
public string Description
{
get
{
return "抓取PDF中的图片";
}
}
public IJobContext TaskContext { get; set; }
public void Dispose()
{
}
public bool Execution()
{
return pdfImage();
}
private bool pdfImage()
{
int succeed = 0, total = 0;
var conn = KiviiContext.GetOpenedDbConnection<AnalyseAll>();
try
{
var query = conn.From<AnalyseAll>();
query.Take(2);
query.Skip(0);
query.Where(o => o.Status < 4);//如果异常了,会重试
query.OrderBy(o => o.CreateTime).ThenBy(o => o.Kvid);
var entities = conn.Select(query);
if (entities.IsNullOrEmpty())
{
TaskContext.Message = "无数据需要处理";
return true;
}
total = entities.Count;
var queryPdfEntityFile = conn.From<EntityDbFile<Analyse>>();
queryPdfEntityFile.Where(o => o.ParentKvid == Guid.Empty & o.DbFolderPath == "/Pdf/Analysis" & o.Extension.ToLower() == ".pdf" & Sql.In(o.OwnerKvid, entities.ConvertAll(p => p.Kvid)));
var pdfEntityFiles = conn.Select(queryPdfEntityFile);
foreach (var item in entities)
{
try
{
var pdfFile = pdfEntityFiles.FirstOrDefault(o => o.OwnerKvid == item.Kvid);
if (pdfFile == null) throw new Exception($"未找Pdf文件:{item.Title},HashCode:{item.HashCode}");
var physicalStorageFilePath = pdfFile.GetPhysicalPath();
if (!File.Exists(physicalStorageFilePath)) throw new Exception($"未找Pdf文件:{item.Title},HashCode:{item.HashCode}");
#region 图片
var imgPath = $"/Storages/Pdf/Analysis/Images/{item.Kvid}";
var reader = new PdfReader(physicalStorageFilePath);
var fileName = pdfFile.Name.Replace(pdfFile.Extension, "");
for (var i = 0; i < reader.XrefSize; i++)
{
var pdfObj = reader.GetPdfObject(i);
......@@ -105,7 +183,7 @@ namespace Njust.Pdf.Analysis.Jobs
{
var ms = new MemoryStream(bytes);
ms.Position = 0;
KiviiContext.VirtualFiles.WriteFile($"{imgPath}/{i}.jpg", ms);
KiviiContext.VirtualFiles.WriteFile($"{imgPath}/{fileName}_{i}.jpg", ms);
//var img = System.Drawing.Image.FromStream(ms);
//img.Save(Path.Combine(tbxExportImagesPath.Text, $"{i}.jpg"), System.Drawing.Imaging.ImageFormat.Jpeg);
......@@ -114,10 +192,15 @@ namespace Njust.Pdf.Analysis.Jobs
{ }
}
#endregion
item.Status = int.MaxValue;
item.AddOnlyProperties(o => o.Status);
conn.UpdateOnly(item);
succeed++;
}
catch (Exception ex)
{
TaskContext.Message = $"处理消息异常:共处理{succeed}/{total},Ex:{ex.Message}";
TaskContext.Message = $"处理消息异常:共处理{succeed}/{total},Kvid:{item.Kvid},Ex:{ex.Message}";
item.Status += 1;
item.AddOnlyProperties(o => o.Status);
conn.UpdateOnly(item);
......
......@@ -44,6 +44,28 @@ namespace Njust.Pdf.Analysis.Tranforms
#endregion
[RequiresAnyRole(SystemRoles.Everyone)]
[Route("/Analysis/PreCheck/{Kvid}")]
public class AnalysePreCheck : RestfulExecution<Analyse>
{
public Guid Kvid { get; set; }
public override object OnExecution(IRequest req, IResponse res)
{
if (Kvid==Guid.Empty) return HttpError.NotFound("Need Kvid!");
var conn = KiviiContext.GetOpenedDbConnection<Analyse>();
var analyse = conn.SingleById<Analyse>(Kvid);
if(analyse==null)return HttpError.NotFound("This Item is Unannounced!");
return new HttpResult
{
Response = analyse,
View = "Template.AnalysisPDF"
};
}
}
[RequiresAnyRole(SystemRoles.Everyone)]
public class AnalyseImport : RestfulExecution<Analyse>
{
public string DomainName { get; set; }
......@@ -141,8 +163,6 @@ namespace Njust.Pdf.Analysis.Tranforms
exist.AddOnlyProperties(o => o.AnlysisUserName);
exist.IsAnlysis = true;
exist.AddOnlyProperties(o => o.IsAnlysis);
exist.Status = 50;//解析了全文 后状态为50
exist.AddOnlyProperties(o => o.Status);
conn.UpdateOnly(exist);
exist.RemoveAllOnlyProperties();
rtns.Results.Add(exist);
......@@ -248,7 +268,7 @@ namespace Njust.Pdf.Analysis.Tranforms
if (!item.Category.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Category);
if (!item.Source.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Source);
if (item.PublishTime != null) item.AddOnlyProperties(o => o.PublishTime);
if (!item.PublishTime.IsNullOrEmpty()) item.AddOnlyProperties(o => o.PublishTime);
if (!item.Country.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Country);
if (!item.Fund.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Fund);
if (!item.Abstract.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Abstract);
......@@ -259,8 +279,6 @@ namespace Njust.Pdf.Analysis.Tranforms
item.IsChecked = true;
item.AddOnlyProperties(o => o.IsChecked);
if (!item.Language.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Language);
item.Status = 100;//解析了全文 后状态为50
item.AddOnlyProperties(o => o.Status);
conn.UpdateOnly(item);
item.RemoveAllOnlyProperties();
rtns.Results.Add(item);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment